From 1c33cdac1c6dde6ba4cabbbf2be8042f35f143d2 Mon Sep 17 00:00:00 2001 From: "weslley.spereira" Date: Thu, 22 Apr 2021 10:18:18 -0300 Subject: [PATCH 01/90] Fix double routine LAPACKE_dtpmqrt_work --- LAPACKE/src/lapacke_dtpmqrt_work.c | 34 ++++++++++++++++++------------ 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/LAPACKE/src/lapacke_dtpmqrt_work.c b/LAPACKE/src/lapacke_dtpmqrt_work.c index d9ee6226be..0b5b06e87f 100644 --- a/LAPACKE/src/lapacke_dtpmqrt_work.c +++ b/LAPACKE/src/lapacke_dtpmqrt_work.c @@ -48,16 +48,24 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,k); + lapack_int nrowsA, ncolsA, nrowsV; + if ( side == 'L' ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == 'R' ) { nrowsA = m; ncolsA = k; nrowsV = n; } + else { + info = -2; + LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); + return info; + } + lapack_int lda_t = MAX(1,nrowsA); lapack_int ldb_t = MAX(1,m); - lapack_int ldt_t = MAX(1,ldt); - lapack_int ldv_t = MAX(1,ldv); + lapack_int ldt_t = MAX(1,nb); + lapack_int ldv_t = MAX(1,nrowsV); double* v_t = NULL; double* t_t = NULL; double* a_t = NULL; double* b_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if ( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); return info; @@ -67,7 +75,7 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); return info; } - if( ldt < nb ) { + if( ldt < k ) { info = -12; LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); return info; @@ -83,12 +91,12 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; } - t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,nb) ); + t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,k) ); if( t_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } - a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,m) ); + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,ncolsA) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; @@ -99,10 +107,10 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_3; } /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_layout, ldv, k, v, ldv, v_t, ldv_t ); - LAPACKE_dge_trans( matrix_layout, ldt, nb, t, ldt, t_t, ldt_t ); - LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); - LAPACKE_dge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_dge_trans( LAPACK_ROW_MAJOR, nrowsV, k, v, ldv, v_t, ldv_t ); + LAPACKE_dge_trans( LAPACK_ROW_MAJOR, nb, k, t, ldt, t_t, ldt_t ); + LAPACKE_dge_trans( LAPACK_ROW_MAJOR, nrowsA, ncolsA, a, lda, a_t, lda_t ); + LAPACKE_dge_trans( LAPACK_ROW_MAJOR, m, n, b, ldb, b_t, ldb_t ); /* Call LAPACK function and adjust info */ LAPACK_dtpmqrt( &side, &trans, &m, &n, &k, &l, &nb, v_t, &ldv_t, t_t, &ldt_t, a_t, &lda_t, b_t, &ldb_t, work, &info ); @@ -110,8 +118,8 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_dge_trans( LAPACK_COL_MAJOR, k, m, a_t, lda_t, a, lda ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, ncolsA, nrowsA, a_t, lda_t, a, lda ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, m, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); exit_level_3: From 217d935085438369ed4bfe99c814dab5514b03de Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 10 Oct 2021 17:36:57 +0200 Subject: [PATCH 02/90] Update conda before enabling conda-forge --- .appveyor.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 627755ba2a..f6fd120f94 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -20,7 +20,8 @@ environment: install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - - conda config --set auto_update_conda false +# - conda config --set auto_update_conda false + - conda update --yes -n base conda - conda config --add channels conda-forge --force - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 From 2495f1ced2d2d080bb523685502e1fc7b9d62713 Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Fri, 15 Oct 2021 14:11:16 -0600 Subject: [PATCH 03/90] Solves a precision bug in clartg --- SRC/clartg.f90 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index f63a0f8d20..08c1b5e12c 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -187,7 +187,11 @@ subroutine CLARTG( f, g, c, s, r ) d = sqrt( f2 )*sqrt( h2 ) end if p = 1 / d - c = f2*p + if( f2 > safmin * g2 ) then + c = 1 / sqrt( one + g2/f2 ) + else + c = f2*p + end if s = conjg( g )*( f*p ) r = f*( h2*p ) else @@ -224,6 +228,11 @@ subroutine CLARTG( f, g, c, s, r ) d = sqrt( f2 )*sqrt( h2 ) end if p = 1 / d + if( f2 > safmin * g2 ) then + c = (1 / sqrt( one + g2/f2 )) * w + else + c = ( f2*p )*w + end if c = ( f2*p )*w s = conjg( gs )*( fs*p ) r = ( fs*( h2*p ) )*u From b89b15b2f408717812c44214c0b9a09589c8ed06 Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Mon, 18 Oct 2021 09:49:56 -0600 Subject: [PATCH 04/90] Removes one line from clartg --- SRC/clartg.f90 | 1 - 1 file changed, 1 deletion(-) diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index 08c1b5e12c..f0327830df 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -233,7 +233,6 @@ subroutine CLARTG( f, g, c, s, r ) else c = ( f2*p )*w end if - c = ( f2*p )*w s = conjg( gs )*( fs*p ) r = ( fs*( h2*p ) )*u end if From ac11f62708b0ef10bbd28fa33d8d29e0e0e34c86 Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Tue, 26 Oct 2021 17:50:27 -0600 Subject: [PATCH 05/90] Several changes to reduce the computation error --- SRC/clartg.f90 | 76 ++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index f0327830df..4e07b29d57 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -129,7 +129,7 @@ subroutine CLARTG( f, g, c, s, r ) complex(wp) f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w complex(wp) :: fs, gs, t ! .. ! .. Intrinsic Functions .. @@ -154,8 +154,7 @@ subroutine CLARTG( f, g, c, s, r ) ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) + d = abs( g ) s = conjg( g ) / d r = d else @@ -163,10 +162,8 @@ subroutine CLARTG( f, g, c, s, r ) ! Use scaled algorithm ! u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) + gs = g / u + d = abs( gs ) s = conjg( gs ) / d r = d*u end if @@ -181,36 +178,40 @@ subroutine CLARTG( f, g, c, s, r ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) - else - d = sqrt( f2 )*sqrt( h2 ) - end if - p = 1 / d if( f2 > safmin * g2 ) then - c = 1 / sqrt( one + g2/f2 ) + d = sqrt( one + g2/f2 ) + c = one / d + if( f2 > rtmin .and. h2 < rtmax ) then + s = conjg( g )*( f / sqrt( f2*h2 ) ) + else + s = conjg( g )*( f /( f2*d ) ) + end if + r = f * d else - c = f2*p + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + c = f2 / d + s = conjg( g )*( f / d ) + r = f*( h2 / d ) end if - s = conjg( g )*( f*p ) - r = f*( h2*p ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 < rtmin * u ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -218,23 +219,30 @@ subroutine CLARTG( f, g, c, s, r ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) - else - d = sqrt( f2 )*sqrt( h2 ) - end if - p = 1 / d if( f2 > safmin * g2 ) then - c = (1 / sqrt( one + g2/f2 )) * w + ! Use a precise algorithm + d = sqrt( w**2 + g2/f2 ) + c = w / d + if( f2 > rtmin .and. h2 < rtmax ) then + s = conjg( gs )*( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs )*( fs / ( f2*d ) ) + end if + r = ( fs * d ) * u else - c = ( f2*p )*w + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) + else + d = sqrt( f2 )*sqrt( h2 ) + end if + c = ( f2 / d )*w + s = conjg( gs )*( fs / d ) + r = ( fs*( h2 / d ) )*u end if - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u end if end if return From 43208822cba931cabc9c34582c964174ea8cfd06 Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Thu, 28 Oct 2021 10:27:14 -0600 Subject: [PATCH 06/90] Starting to modify zlartg --- SRC/clartg.f90 | 1 - SRC/zlartg.f90 | 37 +++++++++++++++---------------------- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index 4e07b29d57..7dde0f1d34 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -224,7 +224,6 @@ subroutine CLARTG( f, g, c, s, r ) h2 = f2 + g2 end if if( f2 > safmin * g2 ) then - ! Use a precise algorithm d = sqrt( w**2 + g2/f2 ) c = w / d if( f2 > rtmin .and. h2 < rtmax ) then diff --git a/SRC/zlartg.f90 b/SRC/zlartg.f90 index e509898a1c..6d320217b6 100644 --- a/SRC/zlartg.f90 +++ b/SRC/zlartg.f90 @@ -129,7 +129,7 @@ subroutine ZLARTG( f, g, c, s, r ) complex(wp) f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w complex(wp) :: fs, gs, t ! .. ! .. Intrinsic Functions .. @@ -154,8 +154,7 @@ subroutine ZLARTG( f, g, c, s, r ) ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) + d = abs( g ) s = conjg( g ) / d r = d else @@ -163,10 +162,8 @@ subroutine ZLARTG( f, g, c, s, r ) ! Use scaled algorithm ! u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) + gs = g / u + d = abs( gs ) s = conjg( gs ) / d r = d*u end if @@ -186,27 +183,24 @@ subroutine ZLARTG( f, g, c, s, r ) else d = sqrt( f2 )*sqrt( h2 ) end if - p = 1 / d - c = f2*p - s = conjg( g )*( f*p ) - r = f*( h2*p ) + c = f2 / d + s = conjg( g )*( f / d ) + r = f*( h2 / d ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 < rtmin*u ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -214,7 +208,7 @@ subroutine ZLARTG( f, g, c, s, r ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if @@ -223,10 +217,9 @@ subroutine ZLARTG( f, g, c, s, r ) else d = sqrt( f2 )*sqrt( h2 ) end if - p = 1 / d - c = ( f2*p )*w - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u + c = ( f2 / d )*w + s = conjg( gs )*( fs / d ) + r = ( fs*( h2 / d ) )*u end if end if return From 37a1a1e6896a4037fdb1f287ee643f8a57ce679e Mon Sep 17 00:00:00 2001 From: "weslley.spereira" Date: Tue, 2 Nov 2021 18:49:08 -0600 Subject: [PATCH 07/90] Fix all other Givens rotation routines --- BLAS/SRC/crotg.f90 | 37 ++++++++++++++-------------------- BLAS/SRC/zrotg.f90 | 37 ++++++++++++++-------------------- SRC/clartg.f90 | 49 ++++++++++++---------------------------------- SRC/dlartg.f90 | 20 +++++++++---------- SRC/slartg.f90 | 20 +++++++++---------- SRC/zlartg.f90 | 37 ++++++++++++++-------------------- 6 files changed, 76 insertions(+), 124 deletions(-) diff --git a/BLAS/SRC/crotg.f90 b/BLAS/SRC/crotg.f90 index 7806140668..24f461ef02 100644 --- a/BLAS/SRC/crotg.f90 +++ b/BLAS/SRC/crotg.f90 @@ -122,7 +122,7 @@ subroutine CROTG( a, b, c, s ) complex(wp) :: a, b, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w complex(wp) :: f, fs, g, gs, r, t ! .. ! .. Intrinsic Functions .. @@ -149,8 +149,7 @@ subroutine CROTG( a, b, c, s ) ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) + d = abs( g ) s = conjg( g ) / d r = d else @@ -158,10 +157,8 @@ subroutine CROTG( a, b, c, s ) ! Use scaled algorithm ! u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) + gs = g / u + d = abs( gs ) s = conjg( gs ) / d r = d*u end if @@ -181,27 +178,24 @@ subroutine CROTG( a, b, c, s ) else d = sqrt( f2 )*sqrt( h2 ) end if - p = 1 / d - c = f2*p - s = conjg( g )*( f*p ) - r = f*( h2*p ) + c = f2 / d + s = conjg( g )*( f / d ) + r = f*( h2 / d ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 / u < rtmin ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -209,7 +203,7 @@ subroutine CROTG( a, b, c, s ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if @@ -218,10 +212,9 @@ subroutine CROTG( a, b, c, s ) else d = sqrt( f2 )*sqrt( h2 ) end if - p = 1 / d - c = ( f2*p )*w - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u + c = ( f2 / d )*w + s = conjg( gs )*( fs / d ) + r = ( fs*( h2 / d ) )*u end if end if a = r diff --git a/BLAS/SRC/zrotg.f90 b/BLAS/SRC/zrotg.f90 index 288e5c7ef5..2bd6fba2e5 100644 --- a/BLAS/SRC/zrotg.f90 +++ b/BLAS/SRC/zrotg.f90 @@ -122,7 +122,7 @@ subroutine ZROTG( a, b, c, s ) complex(wp) :: a, b, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w complex(wp) :: f, fs, g, gs, r, t ! .. ! .. Intrinsic Functions .. @@ -149,8 +149,7 @@ subroutine ZROTG( a, b, c, s ) ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) + d = abs( g ) s = conjg( g ) / d r = d else @@ -158,10 +157,8 @@ subroutine ZROTG( a, b, c, s ) ! Use scaled algorithm ! u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) + gs = g / u + d = abs( gs ) s = conjg( gs ) / d r = d*u end if @@ -181,27 +178,24 @@ subroutine ZROTG( a, b, c, s ) else d = sqrt( f2 )*sqrt( h2 ) end if - p = 1 / d - c = f2*p - s = conjg( g )*( f*p ) - r = f*( h2*p ) + c = f2 / d + s = conjg( g )*( f / d ) + r = f*( h2 / d ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 / u < rtmin ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -209,7 +203,7 @@ subroutine ZROTG( a, b, c, s ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if @@ -218,10 +212,9 @@ subroutine ZROTG( a, b, c, s ) else d = sqrt( f2 )*sqrt( h2 ) end if - p = 1 / d - c = ( f2*p )*w - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u + c = ( f2 / d )*w + s = conjg( gs )*( fs / d ) + r = ( fs*( h2 / d ) )*u end if end if a = r diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index 4e07b29d57..4392e70e9e 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -178,25 +178,14 @@ subroutine CLARTG( f, g, c, s, r ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > safmin * g2 ) then - d = sqrt( one + g2/f2 ) - c = one / d - if( f2 > rtmin .and. h2 < rtmax ) then - s = conjg( g )*( f / sqrt( f2*h2 ) ) - else - s = conjg( g )*( f /( f2*d ) ) - end if - r = f * d + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) else - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) - else - d = sqrt( f2 )*sqrt( h2 ) - end if - c = f2 / d - s = conjg( g )*( f / d ) - r = f*( h2 / d ) + d = sqrt( f2 )*sqrt( h2 ) end if + c = f2 / d + s = conjg( g )*( f / d ) + r = f*( h2 / d ) else ! ! Use scaled algorithm @@ -204,7 +193,7 @@ subroutine CLARTG( f, g, c, s, r ) u = min( safmax, max( safmin, f1, g1 ) ) gs = g / u g2 = ABSSQ( gs ) - if( f1 < rtmin * u ) then + if( f1 / u < rtmin ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. @@ -223,26 +212,14 @@ subroutine CLARTG( f, g, c, s, r ) f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > safmin * g2 ) then - ! Use a precise algorithm - d = sqrt( w**2 + g2/f2 ) - c = w / d - if( f2 > rtmin .and. h2 < rtmax ) then - s = conjg( gs )*( fs / sqrt( f2*h2 ) ) - else - s = conjg( gs )*( fs / ( f2*d ) ) - end if - r = ( fs * d ) * u + if( f2 > rtmin .and. h2 < rtmax ) then + d = sqrt( f2*h2 ) else - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) - else - d = sqrt( f2 )*sqrt( h2 ) - end if - c = ( f2 / d )*w - s = conjg( gs )*( fs / d ) - r = ( fs*( h2 / d ) )*u + d = sqrt( f2 )*sqrt( h2 ) end if + c = ( f2 / d )*w + s = conjg( gs )*( fs / d ) + r = ( fs*( h2 / d ) )*u end if end if return diff --git a/SRC/dlartg.f90 b/SRC/dlartg.f90 index 03a708f863..365fa7207d 100644 --- a/SRC/dlartg.f90 +++ b/SRC/dlartg.f90 @@ -123,7 +123,7 @@ subroutine DLARTG( f, g, c, s, r ) real(wp) :: c, f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, fs, g1, gs, p, u, uu + real(wp) :: d, f1, fs, g1, gs, u ! .. ! .. Intrinsic Functions .. intrinsic :: abs, sign, sqrt @@ -143,20 +143,18 @@ subroutine DLARTG( f, g, c, s, r ) else if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then d = sqrt( f*f + g*g ) - p = one / d - c = f1*p - s = g*sign( p, f ) + c = f1 / d r = sign( d, f ) + s = g / r else u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - fs = f*uu - gs = g*uu + fs = f / u + gs = g / u d = sqrt( fs*fs + gs*gs ) - p = one / d - c = abs( fs )*p - s = gs*sign( p, f ) - r = sign( d, f )*u + c = abs( fs ) / d + r = sign( d, f ) + s = gs / r + r = r * u end if return end subroutine diff --git a/SRC/slartg.f90 b/SRC/slartg.f90 index 2a936a919f..0ef73ddc2c 100644 --- a/SRC/slartg.f90 +++ b/SRC/slartg.f90 @@ -123,7 +123,7 @@ subroutine SLARTG( f, g, c, s, r ) real(wp) :: c, f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, fs, g1, gs, p, u, uu + real(wp) :: d, f1, fs, g1, gs, u ! .. ! .. Intrinsic Functions .. intrinsic :: abs, sign, sqrt @@ -143,20 +143,18 @@ subroutine SLARTG( f, g, c, s, r ) else if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then d = sqrt( f*f + g*g ) - p = one / d - c = f1*p - s = g*sign( p, f ) + c = f1 / d r = sign( d, f ) + s = g / r else u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - fs = f*uu - gs = g*uu + fs = f / u + gs = g / u d = sqrt( fs*fs + gs*gs ) - p = one / d - c = abs( fs )*p - s = gs*sign( p, f ) - r = sign( d, f )*u + c = abs( fs ) / d + r = sign( d, f ) + s = gs / r + r = r*u end if return end subroutine diff --git a/SRC/zlartg.f90 b/SRC/zlartg.f90 index e509898a1c..2bdb8fcc85 100644 --- a/SRC/zlartg.f90 +++ b/SRC/zlartg.f90 @@ -129,7 +129,7 @@ subroutine ZLARTG( f, g, c, s, r ) complex(wp) f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w complex(wp) :: fs, gs, t ! .. ! .. Intrinsic Functions .. @@ -154,8 +154,7 @@ subroutine ZLARTG( f, g, c, s, r ) ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) + d = abs( g ) s = conjg( g ) / d r = d else @@ -163,10 +162,8 @@ subroutine ZLARTG( f, g, c, s, r ) ! Use scaled algorithm ! u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) + gs = g / u + d = abs( gs ) s = conjg( gs ) / d r = d*u end if @@ -186,27 +183,24 @@ subroutine ZLARTG( f, g, c, s, r ) else d = sqrt( f2 )*sqrt( h2 ) end if - p = 1 / d - c = f2*p - s = conjg( g )*( f*p ) - r = f*( h2*p ) + c = f2 / d + s = conjg( g )*( f / d ) + r = f*( h2 / d ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 / u < rtmin ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -214,7 +208,7 @@ subroutine ZLARTG( f, g, c, s, r ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if @@ -223,10 +217,9 @@ subroutine ZLARTG( f, g, c, s, r ) else d = sqrt( f2 )*sqrt( h2 ) end if - p = 1 / d - c = ( f2*p )*w - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u + c = ( f2 / d )*w + s = conjg( gs )*( fs / d ) + r = ( fs*( h2 / d ) )*u end if end if return From 2904d8763e83ea4c7ca8bc4b16740a8ccf5527e2 Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Mon, 22 Nov 2021 18:11:04 -0700 Subject: [PATCH 08/90] Algorithm precise and with no bias in the error --- SRC/clartg.f90 | 80 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index 4392e70e9e..741e86af97 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -117,7 +117,7 @@ subroutine CLARTG( f, g, c, s, r ) use LA_CONSTANTS, & only: wp=>sp, zero=>szero, one=>sone, two=>stwo, czero, & - rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax + safmin=>ssafmin, safmax=>ssafmax ! ! -- LAPACK auxiliary routine (version 3.10.0) -- ! -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -129,7 +129,7 @@ subroutine CLARTG( f, g, c, s, r ) complex(wp) f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, u, v, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax complex(wp) :: fs, gs, t ! .. ! .. Intrinsic Functions .. @@ -141,6 +141,9 @@ subroutine CLARTG( f, g, c, s, r ) ! .. Statement Function definitions .. ABSSQ( t ) = real( t )**2 + aimag( t )**2 ! .. +! .. Constants .. + rtmin = sqrt( safmin ) +! .. ! .. Executable Statements .. ! if( g == czero ) then @@ -150,6 +153,7 @@ subroutine CLARTG( f, g, c, s, r ) else if( f == czero ) then c = zero g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm @@ -170,6 +174,7 @@ subroutine CLARTG( f, g, c, s, r ) else f1 = max( abs(real(f)), abs(aimag(f)) ) g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/4 ) if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then ! @@ -178,14 +183,36 @@ subroutine CLARTG( f, g, c, s, r ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = f / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( g ) * ( f / sqrt( f2*h2 ) ) + else + s = conjg( g ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = f / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = f * ( h2 / d ) + end if + s = conjg( g ) * ( f / d ) end if - c = f2 / d - s = conjg( g )*( f / d ) - r = f*( h2 / d ) else ! ! Use scaled algorithm @@ -212,14 +239,39 @@ subroutine CLARTG( f, g, c, s, r ) f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = fs / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = fs / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = fs * ( h2 / d ) + end if + s = conjg( gs ) * ( fs / d ) end if - c = ( f2 / d )*w - s = conjg( gs )*( fs / d ) - r = ( fs*( h2 / d ) )*u + ! Rescale c and r + c = c * w + r = r * u end if end if return From 95b6e84b03a95618cb12939118b934a7ebcc1ded Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Fri, 10 Dec 2021 15:43:43 -0700 Subject: [PATCH 09/90] Updates Givens rotations with preciser algorithms --- BLAS/SRC/crotg.f90 | 103 +++++++++++++++++++++++++++++------------- BLAS/SRC/zrotg.f90 | 103 +++++++++++++++++++++++++++++------------- SRC/clartg.f90 | 26 +++++------ SRC/dlartg.f90 | 16 ++++--- SRC/slartg.f90 | 12 ++--- SRC/zlartg.f90 | 108 +++++++++++++++++++++++++++++++++------------ 6 files changed, 251 insertions(+), 117 deletions(-) diff --git a/BLAS/SRC/crotg.f90 b/BLAS/SRC/crotg.f90 index 24f461ef02..c7e49e18b5 100644 --- a/BLAS/SRC/crotg.f90 +++ b/BLAS/SRC/crotg.f90 @@ -1,4 +1,4 @@ -!> \brief \b CROTG +!> \brief \b CROTG generates a Givens rotation with real cosine and complex sine. ! ! =========== DOCUMENTATION =========== ! @@ -24,12 +24,12 @@ !> = 1 if x = 0 !> c = |a| / sqrt(|a|**2 + |b|**2) !> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2) -!> When a and b are real and r /= 0, the formulas simplify to !> r = sgn(a)*sqrt(|a|**2 + |b|**2) +!> When a and b are real and r /= 0, the formulas simplify to !> c = a / r !> s = b / r -!> the same as in CROTG when |a| > |b|. When |b| >= |a|, the -!> sign of c and s will be different from those computed by CROTG +!> the same as in SROTG when |a| > |b|. When |b| >= |a|, the +!> sign of c and s will be different from those computed by SROTG !> if the signs of a and b are not the same. !> !> \endverbatim @@ -65,20 +65,19 @@ ! Authors: ! ======== ! -!> \author Edward Anderson, Lockheed Martin +!> \author Weslley Pereira, University of Colorado Denver, USA ! -!> \par Contributors: -! ================== -!> -!> Weslley Pereira, University of Colorado Denver, USA +!> \date December 2021 ! -!> \ingroup single_blas_level1 +!> \ingroup OTHERauxiliary ! !> \par Further Details: ! ===================== !> !> \verbatim !> +!> Based on the algorithm from +!> !> Anderson E. (2017) !> Algorithm 978: Safe Scaling in the Level 1 BLAS !> ACM Trans Math Softw 44:1--28 @@ -108,21 +107,14 @@ subroutine CROTG( a, b, c, s ) 1-minexponent(0._wp), & maxexponent(0._wp)-1 & ) - real(wp), parameter :: rtmin = sqrt( real(radix(0._wp),wp)**max( & - minexponent(0._wp)-1, & - 1-maxexponent(0._wp) & - ) / epsilon(0._wp) ) - real(wp), parameter :: rtmax = sqrt( real(radix(0._wp),wp)**max( & - 1-minexponent(0._wp), & - maxexponent(0._wp)-1 & - ) * epsilon(0._wp) ) + real(wp), parameter :: rtmin = sqrt( safmin ) ! .. ! .. Scalar Arguments .. real(wp) :: c complex(wp) :: a, b, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, u, v, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax complex(wp) :: f, fs, g, gs, r, t ! .. ! .. Intrinsic Functions .. @@ -145,6 +137,7 @@ subroutine CROTG( a, b, c, s ) else if( f == czero ) then c = zero g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm @@ -165,6 +158,7 @@ subroutine CROTG( a, b, c, s ) else f1 = max( abs(real(f)), abs(aimag(f)) ) g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/4 ) if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then ! @@ -173,14 +167,36 @@ subroutine CROTG( a, b, c, s ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = f / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( g ) * ( f / sqrt( f2*h2 ) ) + else + s = conjg( g ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = f / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = f * ( h2 / d ) + end if + s = conjg( g ) * ( f / d ) end if - c = f2 / d - s = conjg( g )*( f / d ) - r = f*( h2 / d ) else ! ! Use scaled algorithm @@ -207,14 +223,39 @@ subroutine CROTG( a, b, c, s ) f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = fs / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = fs / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = fs * ( h2 / d ) + end if + s = conjg( gs ) * ( fs / d ) end if - c = ( f2 / d )*w - s = conjg( gs )*( fs / d ) - r = ( fs*( h2 / d ) )*u + ! Rescale c and r + c = c * w + r = r * u end if end if a = r diff --git a/BLAS/SRC/zrotg.f90 b/BLAS/SRC/zrotg.f90 index 2bd6fba2e5..37aca1e757 100644 --- a/BLAS/SRC/zrotg.f90 +++ b/BLAS/SRC/zrotg.f90 @@ -1,4 +1,4 @@ -!> \brief \b ZROTG +!> \brief \b ZROTG generates a Givens rotation with real cosine and complex sine. ! ! =========== DOCUMENTATION =========== ! @@ -24,12 +24,12 @@ !> = 1 if x = 0 !> c = |a| / sqrt(|a|**2 + |b|**2) !> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2) -!> When a and b are real and r /= 0, the formulas simplify to !> r = sgn(a)*sqrt(|a|**2 + |b|**2) +!> When a and b are real and r /= 0, the formulas simplify to !> c = a / r !> s = b / r -!> the same as in ZROTG when |a| > |b|. When |b| >= |a|, the -!> sign of c and s will be different from those computed by ZROTG +!> the same as in DROTG when |a| > |b|. When |b| >= |a|, the +!> sign of c and s will be different from those computed by DROTG !> if the signs of a and b are not the same. !> !> \endverbatim @@ -65,20 +65,19 @@ ! Authors: ! ======== ! -!> \author Edward Anderson, Lockheed Martin +!> \author Weslley Pereira, University of Colorado Denver, USA ! -!> \par Contributors: -! ================== -!> -!> Weslley Pereira, University of Colorado Denver, USA +!> \date December 2021 ! -!> \ingroup single_blas_level1 +!> \ingroup OTHERauxiliary ! !> \par Further Details: ! ===================== !> !> \verbatim !> +!> Based on the algorithm from +!> !> Anderson E. (2017) !> Algorithm 978: Safe Scaling in the Level 1 BLAS !> ACM Trans Math Softw 44:1--28 @@ -108,21 +107,14 @@ subroutine ZROTG( a, b, c, s ) 1-minexponent(0._wp), & maxexponent(0._wp)-1 & ) - real(wp), parameter :: rtmin = sqrt( real(radix(0._wp),wp)**max( & - minexponent(0._wp)-1, & - 1-maxexponent(0._wp) & - ) / epsilon(0._wp) ) - real(wp), parameter :: rtmax = sqrt( real(radix(0._wp),wp)**max( & - 1-minexponent(0._wp), & - maxexponent(0._wp)-1 & - ) * epsilon(0._wp) ) + real(wp), parameter :: rtmin = sqrt( safmin ) ! .. ! .. Scalar Arguments .. real(wp) :: c complex(wp) :: a, b, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, u, v, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax complex(wp) :: f, fs, g, gs, r, t ! .. ! .. Intrinsic Functions .. @@ -145,6 +137,7 @@ subroutine ZROTG( a, b, c, s ) else if( f == czero ) then c = zero g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm @@ -165,6 +158,7 @@ subroutine ZROTG( a, b, c, s ) else f1 = max( abs(real(f)), abs(aimag(f)) ) g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/4 ) if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then ! @@ -173,14 +167,36 @@ subroutine ZROTG( a, b, c, s ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = f / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( g ) * ( f / sqrt( f2*h2 ) ) + else + s = conjg( g ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = f / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = f * ( h2 / d ) + end if + s = conjg( g ) * ( f / d ) end if - c = f2 / d - s = conjg( g )*( f / d ) - r = f*( h2 / d ) else ! ! Use scaled algorithm @@ -207,14 +223,39 @@ subroutine ZROTG( a, b, c, s ) f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = fs / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = fs / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = fs * ( h2 / d ) + end if + s = conjg( gs ) * ( fs / d ) end if - c = ( f2 / d )*w - s = conjg( gs )*( fs / d ) - r = ( fs*( h2 / d ) )*u + ! Rescale c and r + c = c * w + r = r * u end if end if a = r diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index 741e86af97..b1ecf5755a 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -30,7 +30,7 @@ !> The mathematical formulas used for C and S are !> !> sgn(x) = { x / |x|, x != 0 -!> { 1, x = 0 +!> { 1, x = 0 !> !> R = sgn(F) * sqrt(|F|**2 + |G|**2) !> @@ -38,19 +38,20 @@ !> !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) !> +!> Special conditions: +!> If G=0, then C=1 and S=0. +!> If F=0, then C=0 and S is chosen so that R is real. +!> !> When F and G are real, the formulas simplify to C = F/R and !> S = G/R, and the returned values of C, S, and R should be -!> identical to those returned by CLARTG. +!> identical to those returned by SLARTG. !> !> The algorithm used to compute these quantities incorporates scaling !> to avoid overflow or underflow in computing the square root of the !> sum of squares. !> -!> This is a faster version of the BLAS1 routine CROTG, except for -!> the following differences: -!> F and G are unchanged on return. -!> If G=0, then C=1 and S=0. -!> If F=0, then C=0 and S is chosen so that R is real. +!> This is the same routine CROTG fom BLAS1, except that +!> F and G are unchanged on return. !> !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. !> \endverbatim @@ -91,22 +92,19 @@ ! Authors: ! ======== ! -!> \author Edward Anderson, Lockheed Martin +!> \author Weslley Pereira, University of Colorado Denver, USA ! -!> \date August 2016 +!> \date December 2021 ! !> \ingroup OTHERauxiliary ! -!> \par Contributors: -! ================== -!> -!> Weslley Pereira, University of Colorado Denver, USA -! !> \par Further Details: ! ===================== !> !> \verbatim !> +!> Based on the algorithm from +!> !> Anderson E. (2017) !> Algorithm 978: Safe Scaling in the Level 1 BLAS !> ACM Trans Math Softw 44:1--28 diff --git a/SRC/dlartg.f90 b/SRC/dlartg.f90 index 365fa7207d..ea05ad3c6e 100644 --- a/SRC/dlartg.f90 +++ b/SRC/dlartg.f90 @@ -11,7 +11,7 @@ ! SUBROUTINE DLARTG( F, G, C, S, R ) ! ! .. Scalar Arguments .. -! REAL(wp) C, F, G, R, S +! REAL(wp) C, F, G, R, S ! .. ! !> \par Purpose: @@ -37,7 +37,7 @@ !> This version is discontinuous in R at F = 0 but it returns the same !> C and S as ZLARTG for complex inputs (F,0) and (G,0). !> -!> This is a more accurate version of the BLAS1 routine DROTG, +!> This is a more accurate version of the BLAS1 routine SROTG, !> with the following other differences: !> F and G are unchanged on return. !> If G=0, then C=1 and S=0. @@ -45,8 +45,6 @@ !> floating point operations (saves work in DBDSQR when !> there are zeros on the diagonal). !> -!> If F exceeds G in magnitude, C will be positive. -!> !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. !> \endverbatim ! @@ -112,7 +110,7 @@ subroutine DLARTG( f, g, c, s, r ) use LA_CONSTANTS, & only: wp=>dp, zero=>dzero, half=>dhalf, one=>done, & - rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax + safmin=>dsafmin, safmax=>dsafmax ! ! -- LAPACK auxiliary routine (version 3.10.0) -- ! -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -123,11 +121,15 @@ subroutine DLARTG( f, g, c, s, r ) real(wp) :: c, f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, fs, g1, gs, u + real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax ! .. ! .. Intrinsic Functions .. intrinsic :: abs, sign, sqrt ! .. +! .. Constants .. + rtmin = sqrt( safmin ) + rtmax = sqrt( safmax/2 ) +! .. ! .. Executable Statements .. ! f1 = abs( f ) @@ -154,7 +156,7 @@ subroutine DLARTG( f, g, c, s, r ) c = abs( fs ) / d r = sign( d, f ) s = gs / r - r = r * u + r = r*u end if return end subroutine diff --git a/SRC/slartg.f90 b/SRC/slartg.f90 index 0ef73ddc2c..c445e951c9 100644 --- a/SRC/slartg.f90 +++ b/SRC/slartg.f90 @@ -35,7 +35,7 @@ !> square root of the sum of squares. !> !> This version is discontinuous in R at F = 0 but it returns the same -!> C and S as SLARTG for complex inputs (F,0) and (G,0). +!> C and S as CLARTG for complex inputs (F,0) and (G,0). !> !> This is a more accurate version of the BLAS1 routine SROTG, !> with the following other differences: @@ -45,8 +45,6 @@ !> floating point operations (saves work in SBDSQR when !> there are zeros on the diagonal). !> -!> If F exceeds G in magnitude, C will be positive. -!> !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. !> \endverbatim ! @@ -112,7 +110,7 @@ subroutine SLARTG( f, g, c, s, r ) use LA_CONSTANTS, & only: wp=>sp, zero=>szero, half=>shalf, one=>sone, & - rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax + safmin=>ssafmin, safmax=>ssafmax ! ! -- LAPACK auxiliary routine (version 3.10.0) -- ! -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -123,11 +121,15 @@ subroutine SLARTG( f, g, c, s, r ) real(wp) :: c, f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, fs, g1, gs, u + real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax ! .. ! .. Intrinsic Functions .. intrinsic :: abs, sign, sqrt ! .. +! .. Constants .. + rtmin = sqrt( safmin ) + rtmax = sqrt( safmax/2 ) +! .. ! .. Executable Statements .. ! f1 = abs( f ) diff --git a/SRC/zlartg.f90 b/SRC/zlartg.f90 index 2bdb8fcc85..047929747b 100644 --- a/SRC/zlartg.f90 +++ b/SRC/zlartg.f90 @@ -11,8 +11,8 @@ ! SUBROUTINE ZLARTG( F, G, C, S, R ) ! ! .. Scalar Arguments .. -! REAL(wp) C -! COMPLEX(wp) F, G, R, S +! REAL(wp) C +! COMPLEX(wp) F, G, R, S ! .. ! !> \par Purpose: @@ -30,7 +30,7 @@ !> The mathematical formulas used for C and S are !> !> sgn(x) = { x / |x|, x != 0 -!> { 1, x = 0 +!> { 1, x = 0 !> !> R = sgn(F) * sqrt(|F|**2 + |G|**2) !> @@ -38,6 +38,10 @@ !> !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) !> +!> Special conditions: +!> If G=0, then C=1 and S=0. +!> If F=0, then C=0 and S is chosen so that R is real. +!> !> When F and G are real, the formulas simplify to C = F/R and !> S = G/R, and the returned values of C, S, and R should be !> identical to those returned by DLARTG. @@ -46,11 +50,8 @@ !> to avoid overflow or underflow in computing the square root of the !> sum of squares. !> -!> This is a faster version of the BLAS1 routine ZROTG, except for -!> the following differences: -!> F and G are unchanged on return. -!> If G=0, then C=1 and S=0. -!> If F=0, then C=0 and S is chosen so that R is real. +!> This is the same routine CROTG fom BLAS1, except that +!> F and G are unchanged on return. !> !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. !> \endverbatim @@ -91,22 +92,19 @@ ! Authors: ! ======== ! -!> \author Edward Anderson, Lockheed Martin +!> \author Weslley Pereira, University of Colorado Denver, USA ! -!> \date August 2016 +!> \date December 2021 ! !> \ingroup OTHERauxiliary ! -!> \par Contributors: -! ================== -!> -!> Weslley Pereira, University of Colorado Denver, USA -! !> \par Further Details: ! ===================== !> !> \verbatim !> +!> Based on the algorithm from +!> !> Anderson E. (2017) !> Algorithm 978: Safe Scaling in the Level 1 BLAS !> ACM Trans Math Softw 44:1--28 @@ -117,7 +115,7 @@ subroutine ZLARTG( f, g, c, s, r ) use LA_CONSTANTS, & only: wp=>dp, zero=>dzero, one=>done, two=>dtwo, czero=>zzero, & - rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax + safmin=>dsafmin, safmax=>dsafmax ! ! -- LAPACK auxiliary routine (version 3.10.0) -- ! -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -129,7 +127,7 @@ subroutine ZLARTG( f, g, c, s, r ) complex(wp) f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, u, v, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax complex(wp) :: fs, gs, t ! .. ! .. Intrinsic Functions .. @@ -141,6 +139,9 @@ subroutine ZLARTG( f, g, c, s, r ) ! .. Statement Function definitions .. ABSSQ( t ) = real( t )**2 + aimag( t )**2 ! .. +! .. Constants .. + rtmin = sqrt( safmin ) +! .. ! .. Executable Statements .. ! if( g == czero ) then @@ -150,6 +151,7 @@ subroutine ZLARTG( f, g, c, s, r ) else if( f == czero ) then c = zero g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm @@ -170,6 +172,7 @@ subroutine ZLARTG( f, g, c, s, r ) else f1 = max( abs(real(f)), abs(aimag(f)) ) g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/4 ) if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then ! @@ -178,14 +181,36 @@ subroutine ZLARTG( f, g, c, s, r ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = f / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( g ) * ( f / sqrt( f2*h2 ) ) + else + s = conjg( g ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = f / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = f * ( h2 / d ) + end if + s = conjg( g ) * ( f / d ) end if - c = f2 / d - s = conjg( g )*( f / d ) - r = f*( h2 / d ) else ! ! Use scaled algorithm @@ -212,14 +237,39 @@ subroutine ZLARTG( f, g, c, s, r ) f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = fs / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = fs / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = fs * ( h2 / d ) + end if + s = conjg( gs ) * ( fs / d ) end if - c = ( f2 / d )*w - s = conjg( gs )*( fs / d ) - r = ( fs*( h2 / d ) )*u + ! Rescale c and r + c = c * w + r = r * u end if end if return From cdc8f33194e2e52b5bb54959da81893ea2bc1754 Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Mon, 13 Dec 2021 09:06:04 -0700 Subject: [PATCH 10/90] Fix documentation thanks to @vladimir-ch --- SRC/dlartg.f90 | 2 +- SRC/zlartg.f90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SRC/dlartg.f90 b/SRC/dlartg.f90 index ea05ad3c6e..0c5f1e1b95 100644 --- a/SRC/dlartg.f90 +++ b/SRC/dlartg.f90 @@ -37,7 +37,7 @@ !> This version is discontinuous in R at F = 0 but it returns the same !> C and S as ZLARTG for complex inputs (F,0) and (G,0). !> -!> This is a more accurate version of the BLAS1 routine SROTG, +!> This is a more accurate version of the BLAS1 routine DROTG, !> with the following other differences: !> F and G are unchanged on return. !> If G=0, then C=1 and S=0. diff --git a/SRC/zlartg.f90 b/SRC/zlartg.f90 index 047929747b..d1333e9265 100644 --- a/SRC/zlartg.f90 +++ b/SRC/zlartg.f90 @@ -50,7 +50,7 @@ !> to avoid overflow or underflow in computing the square root of the !> sum of squares. !> -!> This is the same routine CROTG fom BLAS1, except that +!> This is the same routine ZROTG fom BLAS1, except that !> F and G are unchanged on return. !> !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. From c362fff1eee80fdd88b6fd60be3fb9d045cb08bb Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Mon, 13 Dec 2021 13:23:06 -0700 Subject: [PATCH 11/90] Minor changes --- BLAS/SRC/crotg.f90 | 44 +++++++++++++++++++++++++++++--------------- BLAS/SRC/zrotg.f90 | 44 +++++++++++++++++++++++++++++--------------- SRC/clartg.f90 | 42 ++++++++++++++++++++++++++++-------------- SRC/zlartg.f90 | 42 ++++++++++++++++++++++++++++-------------- 4 files changed, 114 insertions(+), 58 deletions(-) diff --git a/BLAS/SRC/crotg.f90 b/BLAS/SRC/crotg.f90 index c7e49e18b5..0fb7bb09f8 100644 --- a/BLAS/SRC/crotg.f90 +++ b/BLAS/SRC/crotg.f90 @@ -69,7 +69,7 @@ ! !> \date December 2021 ! -!> \ingroup OTHERauxiliary +!> \ingroup single_blas_level1 ! !> \par Further Details: ! ===================== @@ -136,24 +136,38 @@ subroutine CROTG( a, b, c, s ) r = f else if( f == czero ) then c = zero - g1 = max( abs(real(g)), abs(aimag(g)) ) - rtmax = sqrt( safmax/2 ) - if( g1 > rtmin .and. g1 < rtmax ) then + if( real(g) == zero ) then + r = abs(aimag(g)) + s = conjg( g ) / r + elseif( aimag(g) == zero ) then + r = abs(real(g)) + s = conjg( g ) / r + else + g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) + if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm ! - d = abs( g ) - s = conjg( g ) / d - r = d - else +! The following two lines can be replaced by `d = abs( g )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else ! ! Use scaled algorithm ! - u = min( safmax, max( safmin, g1 ) ) - gs = g / u - d = abs( gs ) - s = conjg( gs ) / d - r = d*u + u = min( safmax, max( safmin, g1 ) ) + gs = g / u +! The following two lines can be replaced by `d = abs( gs )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if end if else f1 = max( abs(real(f)), abs(aimag(f)) ) @@ -192,7 +206,7 @@ subroutine CROTG( a, b, c, s ) r = f / c else ! f2 / sqrt(f2 * h2) < safmin, then - ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax r = f * ( h2 / d ) end if s = conjg( g ) * ( f / d ) @@ -248,7 +262,7 @@ subroutine CROTG( a, b, c, s ) r = fs / c else ! f2 / sqrt(f2 * h2) < safmin, then - ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax r = fs * ( h2 / d ) end if s = conjg( gs ) * ( fs / d ) diff --git a/BLAS/SRC/zrotg.f90 b/BLAS/SRC/zrotg.f90 index 37aca1e757..bea4c278ff 100644 --- a/BLAS/SRC/zrotg.f90 +++ b/BLAS/SRC/zrotg.f90 @@ -69,7 +69,7 @@ ! !> \date December 2021 ! -!> \ingroup OTHERauxiliary +!> \ingroup single_blas_level1 ! !> \par Further Details: ! ===================== @@ -136,24 +136,38 @@ subroutine ZROTG( a, b, c, s ) r = f else if( f == czero ) then c = zero - g1 = max( abs(real(g)), abs(aimag(g)) ) - rtmax = sqrt( safmax/2 ) - if( g1 > rtmin .and. g1 < rtmax ) then + if( real(g) == zero ) then + r = abs(aimag(g)) + s = conjg( g ) / r + elseif( aimag(g) == zero ) then + r = abs(real(g)) + s = conjg( g ) / r + else + g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) + if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm ! - d = abs( g ) - s = conjg( g ) / d - r = d - else +! The following two lines can be replaced by `d = abs( g )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else ! ! Use scaled algorithm ! - u = min( safmax, max( safmin, g1 ) ) - gs = g / u - d = abs( gs ) - s = conjg( gs ) / d - r = d*u + u = min( safmax, max( safmin, g1 ) ) + gs = g / u +! The following two lines can be replaced by `d = abs( gs )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if end if else f1 = max( abs(real(f)), abs(aimag(f)) ) @@ -192,7 +206,7 @@ subroutine ZROTG( a, b, c, s ) r = f / c else ! f2 / sqrt(f2 * h2) < safmin, then - ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax r = f * ( h2 / d ) end if s = conjg( g ) * ( f / d ) @@ -248,7 +262,7 @@ subroutine ZROTG( a, b, c, s ) r = fs / c else ! f2 / sqrt(f2 * h2) < safmin, then - ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax r = fs * ( h2 / d ) end if s = conjg( gs ) * ( fs / d ) diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index b1ecf5755a..0ab8b8e089 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -150,24 +150,38 @@ subroutine CLARTG( f, g, c, s, r ) r = f else if( f == czero ) then c = zero - g1 = max( abs(real(g)), abs(aimag(g)) ) - rtmax = sqrt( safmax/2 ) - if( g1 > rtmin .and. g1 < rtmax ) then + if( real(g) == zero ) then + r = abs(aimag(g)) + s = conjg( g ) / r + elseif( aimag(g) == zero ) then + r = abs(real(g)) + s = conjg( g ) / r + else + g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) + if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm ! - d = abs( g ) - s = conjg( g ) / d - r = d - else +! The following two lines can be replaced by `d = abs( g )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else ! ! Use scaled algorithm ! - u = min( safmax, max( safmin, g1 ) ) - gs = g / u - d = abs( gs ) - s = conjg( gs ) / d - r = d*u + u = min( safmax, max( safmin, g1 ) ) + gs = g / u +! The following two lines can be replaced by `d = abs( gs )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if end if else f1 = max( abs(real(f)), abs(aimag(f)) ) @@ -206,7 +220,7 @@ subroutine CLARTG( f, g, c, s, r ) r = f / c else ! f2 / sqrt(f2 * h2) < safmin, then - ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax r = f * ( h2 / d ) end if s = conjg( g ) * ( f / d ) @@ -262,7 +276,7 @@ subroutine CLARTG( f, g, c, s, r ) r = fs / c else ! f2 / sqrt(f2 * h2) < safmin, then - ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax r = fs * ( h2 / d ) end if s = conjg( gs ) * ( fs / d ) diff --git a/SRC/zlartg.f90 b/SRC/zlartg.f90 index d1333e9265..289e2cf1ae 100644 --- a/SRC/zlartg.f90 +++ b/SRC/zlartg.f90 @@ -150,24 +150,38 @@ subroutine ZLARTG( f, g, c, s, r ) r = f else if( f == czero ) then c = zero - g1 = max( abs(real(g)), abs(aimag(g)) ) - rtmax = sqrt( safmax/2 ) - if( g1 > rtmin .and. g1 < rtmax ) then + if( real(g) == zero ) then + r = abs(aimag(g)) + s = conjg( g ) / r + elseif( aimag(g) == zero ) then + r = abs(real(g)) + s = conjg( g ) / r + else + g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) + if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm ! - d = abs( g ) - s = conjg( g ) / d - r = d - else +! The following two lines can be replaced by `d = abs( g )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else ! ! Use scaled algorithm ! - u = min( safmax, max( safmin, g1 ) ) - gs = g / u - d = abs( gs ) - s = conjg( gs ) / d - r = d*u + u = min( safmax, max( safmin, g1 ) ) + gs = g / u +! The following two lines can be replaced by `d = abs( gs )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if end if else f1 = max( abs(real(f)), abs(aimag(f)) ) @@ -206,7 +220,7 @@ subroutine ZLARTG( f, g, c, s, r ) r = f / c else ! f2 / sqrt(f2 * h2) < safmin, then - ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax r = f * ( h2 / d ) end if s = conjg( g ) * ( f / d ) @@ -262,7 +276,7 @@ subroutine ZLARTG( f, g, c, s, r ) r = fs / c else ! f2 / sqrt(f2 * h2) < safmin, then - ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax r = fs * ( h2 / d ) end if s = conjg( gs ) * ( fs / d ) From bcdff4425491f02e1d4c65219ff41497595a63b6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Dec 2021 19:32:49 +0100 Subject: [PATCH 12/90] try to work around failed install of charset-normalizer --- .appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.appveyor.yml b/.appveyor.yml index f6fd120f94..af886703b3 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -21,6 +21,7 @@ environment: install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false + - conda clean --packages - conda update --yes -n base conda - conda config --add channels conda-forge --force - conda install --yes --quiet flang jom From bb66bc22806775a61f3f4ddb3ab3e1c4c01f0810 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Dec 2021 19:34:37 +0100 Subject: [PATCH 13/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index af886703b3..a03b7e9d2e 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -21,7 +21,7 @@ environment: install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false - - conda clean --packages + - conda clean --yes --packages - conda update --yes -n base conda - conda config --add channels conda-forge --force - conda install --yes --quiet flang jom From eb5c151e10f1a9c6dccc6c3daadb584c7fb51d56 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Dec 2021 21:27:11 +0100 Subject: [PATCH 14/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index a03b7e9d2e..2d673d8c09 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -22,8 +22,8 @@ install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false - conda clean --yes --packages - - conda update --yes -n base conda - conda config --add channels conda-forge --force + - conda update --yes -n base conda - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" From 56b6ebf71a5deb4a1dec3778eec12e073b91ac57 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Dec 2021 21:54:46 +0100 Subject: [PATCH 15/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 2d673d8c09..bb3fdbf665 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -21,8 +21,8 @@ environment: install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false - - conda clean --yes --packages - conda config --add channels conda-forge --force + - conda install --yes charset-normalizer --force - conda update --yes -n base conda - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 From 75082fa032802ba94d31018210d2c3d8493e33d2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Dec 2021 22:18:45 +0100 Subject: [PATCH 16/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index bb3fdbf665..5db2df56de 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -20,7 +20,7 @@ environment: install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat -# - conda config --set auto_update_conda false + - conda config --set auto_update_conda false - conda config --add channels conda-forge --force - conda install --yes charset-normalizer --force - conda update --yes -n base conda From ed834d46135ae166b62b26345123d7a4f7ad6b26 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 Dec 2021 13:43:59 +0100 Subject: [PATCH 17/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 5db2df56de..25bd25537e 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -22,7 +22,7 @@ install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - conda config --set auto_update_conda false - conda config --add channels conda-forge --force - - conda install --yes charset-normalizer --force + - conda install --yes conda-forge/win-64::charset-normalizer --force - conda update --yes -n base conda - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 From f78c6acd35c5392e6d5457e0d49986cc27db4fc4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 Dec 2021 16:24:24 +0100 Subject: [PATCH 18/90] Update .appveyor.yml --- .appveyor.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 25bd25537e..230f67a654 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -22,7 +22,8 @@ install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - conda config --set auto_update_conda false - conda config --add channels conda-forge --force - - conda install --yes conda-forge/win-64::charset-normalizer --force + - conda install conda=4.10.3 + - conda install --yes charset-normalizer --force - conda update --yes -n base conda - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 From 64487cc5de356944122140c2f1457af8d62cf896 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 Dec 2021 17:34:50 +0100 Subject: [PATCH 19/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 230f67a654..3d114838da 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -22,7 +22,7 @@ install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - conda config --set auto_update_conda false - conda config --add channels conda-forge --force - - conda install conda=4.10.3 + - conda install --yes conda=4.10.3 --force - conda install --yes charset-normalizer --force - conda update --yes -n base conda - conda install --yes --quiet flang jom From 5a585d7f6afd03aa86f39a293487286d75fdd0be Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 Dec 2021 20:27:21 +0100 Subject: [PATCH 20/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 3d114838da..3f1381df57 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -22,7 +22,7 @@ install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - conda config --set auto_update_conda false - conda config --add channels conda-forge --force - - conda install --yes conda=4.10.3 --force + - conda install --yes conda=4.3.8 --force - conda install --yes charset-normalizer --force - conda update --yes -n base conda - conda install --yes --quiet flang jom From 9c6729e351a5f9515e201b5441d093408f4cda85 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 Dec 2021 21:35:57 +0100 Subject: [PATCH 21/90] Update .appveyor.yml --- .appveyor.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 3f1381df57..78a665eb33 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -20,11 +20,11 @@ environment: install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - - conda config --set auto_update_conda false +# - conda config --set auto_update_conda false - conda config --add channels conda-forge --force - - conda install --yes conda=4.3.8 --force - - conda install --yes charset-normalizer --force - - conda update --yes -n base conda +# - conda install --yes conda=4.3.8 --force +# - conda install --yes charset-normalizer --force +# - conda update --yes -n base conda - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" From 83f13fb2c7fe20d64fabc29de804ca599c7127fa Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 28 Dec 2021 19:28:30 +0100 Subject: [PATCH 22/90] Update .appveyor.yml --- .appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.appveyor.yml b/.appveyor.yml index 78a665eb33..69346d9921 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -22,6 +22,7 @@ install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false - conda config --add channels conda-forge --force + - conda clean --all --yes # - conda install --yes conda=4.3.8 --force # - conda install --yes charset-normalizer --force # - conda update --yes -n base conda From 1acee2241ac1a7545d00d350d734d15331f5eade Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 28 Dec 2021 19:43:01 +0100 Subject: [PATCH 23/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 69346d9921..2996701e01 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -24,7 +24,7 @@ install: - conda config --add channels conda-forge --force - conda clean --all --yes # - conda install --yes conda=4.3.8 --force -# - conda install --yes charset-normalizer --force + - conda install --yes -c conda-forge charset-normalizer=2.0.5 --force # - conda update --yes -n base conda - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 From 22d172188ce2c1cab440ea4568b5a061a46e557b Mon Sep 17 00:00:00 2001 From: Angelika Schwarz Date: Sun, 19 Jun 2022 22:51:43 +0100 Subject: [PATCH 24/90] Add missing numerical tests for TREVC3 At least some tests, though there are still code paths that are not covered * input sizes defined in nep.in are small * RWORK in [CZ]TREVC3 is de factor defined as N-vector from the input file and limits the blocked computation --- TESTING/EIG/cchkhs.f | 81 +++++++++++++++++++++++++++++++++++++++---- TESTING/EIG/dchkhs.f | 82 ++++++++++++++++++++++++++++++++++++++++---- TESTING/EIG/schkhs.f | 79 ++++++++++++++++++++++++++++++++++++++---- TESTING/EIG/zchkhs.f | 79 +++++++++++++++++++++++++++++++++++++++--- 4 files changed, 297 insertions(+), 24 deletions(-) diff --git a/TESTING/EIG/cchkhs.f b/TESTING/EIG/cchkhs.f index 65f1fc82d4..6c6430d5f8 100644 --- a/TESTING/EIG/cchkhs.f +++ b/TESTING/EIG/cchkhs.f @@ -21,7 +21,7 @@ * .. Array Arguments .. * LOGICAL DOTYPE( * ), SELECT( * ) * INTEGER ISEED( 4 ), IWORK( * ), NN( * ) -* REAL RESULT( 14 ), RWORK( * ) +* REAL RESULT( 16 ), RWORK( * ) * COMPLEX A( LDA, * ), EVECTL( LDU, * ), * $ EVECTR( LDU, * ), EVECTX( LDU, * ), * $ EVECTY( LDU, * ), H( LDA, * ), T1( LDA, * ), @@ -64,10 +64,15 @@ *> eigenvectors of H. Y is lower triangular, and X is *> upper triangular. *> +*> CTREVC3 computes left and right eigenvector matrices +*> from a Schur matrix T and backtransforms them with Z +*> to eigenvector matrices L and R for A. L and R are +*> GE matrices. +*> *> When CCHKHS is called, a number of matrix "sizes" ("n's") and a *> number of matrix "types" are specified. For each size ("n") *> and each type of matrix, one matrix will be generated and used -*> to test the nonsymmetric eigenroutines. For each matrix, 14 +*> to test the nonsymmetric eigenroutines. For each matrix, 16 *> tests will be performed: *> *> (1) | A - U H U**H | / ( |A| n ulp ) @@ -98,6 +103,10 @@ *> *> (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) *> +*> (15) | AR - RW | / ( |A| |R| ulp ) +*> +*> (16) | LA - WL | / ( |A| |L| ulp ) +*> *> The "sizes" are specified by an array NN(1:NSIZES); the value of *> each element NN(j) specifies one size. *> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); @@ -331,7 +340,7 @@ *> Workspace. Could be equivalenced to IWORK, but not RWORK. *> Modified. *> -*> RESULT - REAL array, dimension (14) +*> RESULT - REAL array, dimension (16) *> The values computed by the fourteen tests described above. *> The values are currently limited to 1/ulp, to avoid *> overflow. @@ -421,7 +430,7 @@ SUBROUTINE CCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * .. Array Arguments .. LOGICAL DOTYPE( * ), SELECT( * ) INTEGER ISEED( 4 ), IWORK( * ), NN( * ) - REAL RESULT( 14 ), RWORK( * ) + REAL RESULT( 16 ), RWORK( * ) COMPLEX A( LDA, * ), EVECTL( LDU, * ), $ EVECTR( LDU, * ), EVECTX( LDU, * ), $ EVECTY( LDU, * ), H( LDA, * ), T1( LDA, * ), @@ -463,8 +472,8 @@ SUBROUTINE CCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * .. External Subroutines .. EXTERNAL CCOPY, CGEHRD, CGEMM, CGET10, CGET22, CHSEIN, $ CHSEQR, CHST01, CLACPY, CLASET, CLATME, CLATMR, - $ CLATMS, CTREVC, CUNGHR, CUNMHR, SLABAD, SLAFTS, - $ SLASUM, XERBLA + $ CLATMS, CTREVC, CTREVC3, CUNGHR, CUNMHR, + $ SLABAD, SLAFTS, SLASUM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, REAL, SQRT @@ -1067,6 +1076,66 @@ SUBROUTINE CCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, $ RESULT( 14 ) = DUMMA( 3 )*ANINV END IF * +* Compute Left and Right Eigenvectors of A +* +* Compute a Right eigenvector matrix: +* + NTEST = 15 + RESULT( 15 ) = ULPINV +* + CALL CLACPY( ' ', N, N, UZ, LDU, EVECTR, LDU ) +* + CALL CTREVC3( 'Right', 'Back', SELECT, N, T1, LDA, CDUMMA, + $ LDU, EVECTR, LDU, N, IN, WORK, NWORK, RWORK, + $ N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CTREVC3(R,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 15: | AR - RW | / ( |A| |R| ulp ) +* +* (from Schur decomposition) +* + CALL CGET22( 'N', 'N', 'N', N, A, LDA, EVECTR, LDU, W1, + $ WORK, RWORK, DUMMA( 1 ) ) + RESULT( 15 ) = DUMMA( 1 ) + IF( DUMMA( 2 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Right', 'CTREVC3', + $ DUMMA( 2 ), N, JTYPE, IOLDSD + END IF +* +* Compute a Left eigenvector matrix: +* + NTEST = 16 + RESULT( 16 ) = ULPINV +* + CALL CLACPY( ' ', N, N, UZ, LDU, EVECTL, LDU ) +* + CALL CTREVC3( 'Left', 'Back', SELECT, N, T1, LDA, EVECTL, + $ LDU, CDUMMA, LDU, N, IN, WORK, NWORK, RWORK, + $ N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CTREVC3(L,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 16: | LA - WL | / ( |A| |L| ulp ) +* +* (from Schur decomposition) +* + CALL CGET22( 'Conj', 'N', 'Conj', N, A, LDA, EVECTL, LDU, + $ W1, WORK, RWORK, DUMMA( 3 ) ) + RESULT( 16 ) = DUMMA( 3 ) + IF( DUMMA( 4 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Left', 'CTREVC3', DUMMA( 4 ), + $ N, JTYPE, IOLDSD + END IF +* * End of Loop -- Check for RESULT(j) > THRESH * 240 CONTINUE diff --git a/TESTING/EIG/dchkhs.f b/TESTING/EIG/dchkhs.f index 2e57498965..79ba960086 100644 --- a/TESTING/EIG/dchkhs.f +++ b/TESTING/EIG/dchkhs.f @@ -23,7 +23,7 @@ * INTEGER ISEED( 4 ), IWORK( * ), NN( * ) * DOUBLE PRECISION A( LDA, * ), EVECTL( LDU, * ), * $ EVECTR( LDU, * ), EVECTX( LDU, * ), -* $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 14 ), +* $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 16 ), * $ T1( LDA, * ), T2( LDA, * ), TAU( * ), * $ U( LDU, * ), UU( LDU, * ), UZ( LDU, * ), * $ WI1( * ), WI2( * ), WI3( * ), WORK( * ), @@ -49,15 +49,21 @@ *> T is "quasi-triangular", and the eigenvalue vector W. *> *> DTREVC computes the left and right eigenvector matrices -*> L and R for T. +*> L and R for T. L is lower quasi-triangular, and R is +*> upper quasi-triangular. *> *> DHSEIN computes the left and right eigenvector matrices *> Y and X for H, using inverse iteration. *> +*> DTREVC3 computes left and right eigenvector matrices +*> from a Schur matrix T and backtransforms them with Z +*> to eigenvector matrices L and R for A. L and R are +*> GE matrices. +*> *> When DCHKHS is called, a number of matrix "sizes" ("n's") and a *> number of matrix "types" are specified. For each size ("n") *> and each type of matrix, one matrix will be generated and used -*> to test the nonsymmetric eigenroutines. For each matrix, 14 +*> to test the nonsymmetric eigenroutines. For each matrix, 16 *> tests will be performed: *> *> (1) | A - U H U**T | / ( |A| n ulp ) @@ -88,6 +94,10 @@ *> *> (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) *> +*> (15) | AR - RW | / ( |A| |R| ulp ) +*> +*> (16) | LA - WL | / ( |A| |L| ulp ) +*> *> The "sizes" are specified by an array NN(1:NSIZES); the value of *> each element NN(j) specifies one size. *> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); @@ -331,7 +341,7 @@ *> Workspace. *> Modified. *> -*> RESULT - DOUBLE PRECISION array, dimension (14) +*> RESULT - DOUBLE PRECISION array, dimension (16) *> The values computed by the fourteen tests described above. *> The values are currently limited to 1/ulp, to avoid *> overflow. @@ -423,7 +433,7 @@ SUBROUTINE DCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, INTEGER ISEED( 4 ), IWORK( * ), NN( * ) DOUBLE PRECISION A( LDA, * ), EVECTL( LDU, * ), $ EVECTR( LDU, * ), EVECTX( LDU, * ), - $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 14 ), + $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 16 ), $ T1( LDA, * ), T2( LDA, * ), TAU( * ), $ U( LDU, * ), UU( LDU, * ), UZ( LDU, * ), $ WI1( * ), WI2( * ), WI3( * ), WORK( * ), @@ -461,7 +471,7 @@ SUBROUTINE DCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, EXTERNAL DCOPY, DGEHRD, DGEMM, DGET10, DGET22, DHSEIN, $ DHSEQR, DHST01, DLABAD, DLACPY, DLAFTS, DLASET, $ DLASUM, DLATME, DLATMR, DLATMS, DORGHR, DORMHR, - $ DTREVC, XERBLA + $ DTREVC, DTREVC3, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, SQRT @@ -561,7 +571,7 @@ SUBROUTINE DCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * Initialize RESULT * - DO 30 J = 1, 14 + DO 30 J = 1, 16 RESULT( J ) = ZERO 30 CONTINUE * @@ -1108,6 +1118,64 @@ SUBROUTINE DCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, $ RESULT( 14 ) = DUMMA( 3 )*ANINV END IF * +* Compute Left and Right Eigenvectors of A +* +* Compute a Right eigenvector matrix: +* + NTEST = 15 + RESULT( 15 ) = ULPINV +* + CALL DLACPY( ' ', N, N, UZ, LDU, EVECTR, LDU ) +* + CALL DTREVC3( 'Right', 'Back', SELECT, N, T1, LDA, DUMMA, + $ LDU, EVECTR, LDU, N, IN, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DTREVC3(R,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 15: | AR - RW | / ( |A| |R| ulp ) +* +* (from Schur decomposition) +* + CALL DGET22( 'N', 'N', 'N', N, A, LDA, EVECTR, LDU, WR1, + $ WI1, WORK, DUMMA( 1 ) ) + RESULT( 15 ) = DUMMA( 1 ) + IF( DUMMA( 2 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Right', 'DTREVC3', + $ DUMMA( 2 ), N, JTYPE, IOLDSD + END IF +* +* Compute a Left eigenvector matrix: +* + NTEST = 16 + RESULT( 16 ) = ULPINV +* + CALL DLACPY( ' ', N, N, UZ, LDU, EVECTL, LDU ) +* + CALL DTREVC3( 'Left', 'Back', SELECT, N, T1, LDA, EVECTL, + $ LDU, DUMMA, LDU, N, IN, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DTREVC3(L,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 16: | LA - WL | / ( |A| |L| ulp ) +* +* (from Schur decomposition) +* + CALL DGET22( 'Trans', 'N', 'Conj', N, A, LDA, EVECTL, LDU, + $ WR1, WI1, WORK, DUMMA( 3 ) ) + RESULT( 16 ) = DUMMA( 3 ) + IF( DUMMA( 4 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Left', 'DTREVC3', DUMMA( 4 ), + $ N, JTYPE, IOLDSD + END IF +* * End of Loop -- Check for RESULT(j) > THRESH * 250 CONTINUE diff --git a/TESTING/EIG/schkhs.f b/TESTING/EIG/schkhs.f index ab0e901383..bf8eb1b409 100644 --- a/TESTING/EIG/schkhs.f +++ b/TESTING/EIG/schkhs.f @@ -23,7 +23,7 @@ * INTEGER ISEED( 4 ), IWORK( * ), NN( * ) * REAL A( LDA, * ), EVECTL( LDU, * ), * $ EVECTR( LDU, * ), EVECTX( LDU, * ), -* $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 14 ), +* $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 16 ), * $ T1( LDA, * ), T2( LDA, * ), TAU( * ), * $ U( LDU, * ), UU( LDU, * ), UZ( LDU, * ), * $ WI1( * ), WI2( * ), WI3( * ), WORK( * ), @@ -54,10 +54,15 @@ *> SHSEIN computes the left and right eigenvector matrices *> Y and X for H, using inverse iteration. *> +*> STREVC3 computes left and right eigenvector matrices +*> from a Schur matrix T and backtransforms them with Z +*> to eigenvector matrices L and R for A. L and R are +*> GE matrices. +*> *> When SCHKHS is called, a number of matrix "sizes" ("n's") and a *> number of matrix "types" are specified. For each size ("n") *> and each type of matrix, one matrix will be generated and used -*> to test the nonsymmetric eigenroutines. For each matrix, 14 +*> to test the nonsymmetric eigenroutines. For each matrix, 16 *> tests will be performed: *> *> (1) | A - U H U**T | / ( |A| n ulp ) @@ -88,6 +93,10 @@ *> *> (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) *> +*> (15) | AR - RW | / ( |A| |R| ulp ) +*> +*> (16) | LA - WL | / ( |A| |L| ulp ) +*> *> The "sizes" are specified by an array NN(1:NSIZES); the value of *> each element NN(j) specifies one size. *> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); @@ -331,7 +340,7 @@ *> Workspace. *> Modified. *> -*> RESULT - REAL array, dimension (14) +*> RESULT - REAL array, dimension (16) *> The values computed by the fourteen tests described above. *> The values are currently limited to 1/ulp, to avoid *> overflow. @@ -423,7 +432,7 @@ SUBROUTINE SCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, INTEGER ISEED( 4 ), IWORK( * ), NN( * ) REAL A( LDA, * ), EVECTL( LDU, * ), $ EVECTR( LDU, * ), EVECTX( LDU, * ), - $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 14 ), + $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 16 ), $ T1( LDA, * ), T2( LDA, * ), TAU( * ), $ U( LDU, * ), UU( LDU, * ), UZ( LDU, * ), $ WI1( * ), WI2( * ), WI3( * ), WORK( * ), @@ -461,7 +470,7 @@ SUBROUTINE SCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, EXTERNAL SCOPY, SGEHRD, SGEMM, SGET10, SGET22, SHSEIN, $ SHSEQR, SHST01, SLABAD, SLACPY, SLAFTS, SLASET, $ SLASUM, SLATME, SLATMR, SLATMS, SORGHR, SORMHR, - $ STREVC, XERBLA + $ STREVC, STREVC3, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, REAL, SQRT @@ -561,7 +570,7 @@ SUBROUTINE SCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * Initialize RESULT * - DO 30 J = 1, 14 + DO 30 J = 1, 16 RESULT( J ) = ZERO 30 CONTINUE * @@ -1108,6 +1117,64 @@ SUBROUTINE SCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, $ RESULT( 14 ) = DUMMA( 3 )*ANINV END IF * +* Compute Left and Right Eigenvectors of A +* +* Compute a Right eigenvector matrix: +* + NTEST = 15 + RESULT( 15 ) = ULPINV +* + CALL SLACPY( ' ', N, N, UZ, LDU, EVECTR, LDU ) +* + CALL STREVC3( 'Right', 'Back', SELECT, N, T1, LDA, DUMMA, + $ LDU, EVECTR, LDU, N, IN, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'STREVC3(R,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 15: | AR - RW | / ( |A| |R| ulp ) +* +* (from Schur decomposition) +* + CALL SGET22( 'N', 'N', 'N', N, A, LDA, EVECTR, LDU, WR1, + $ WI1, WORK, DUMMA( 1 ) ) + RESULT( 15 ) = DUMMA( 1 ) + IF( DUMMA( 2 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Right', 'STREVC3', + $ DUMMA( 2 ), N, JTYPE, IOLDSD + END IF +* +* Compute a Left eigenvector matrix: +* + NTEST = 16 + RESULT( 16 ) = ULPINV +* + CALL SLACPY( ' ', N, N, UZ, LDU, EVECTL, LDU ) +* + CALL STREVC3( 'Left', 'Back', SELECT, N, T1, LDA, EVECTL, + $ LDU, DUMMA, LDU, N, IN, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'STREVC3(L,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 16: | LA - WL | / ( |A| |L| ulp ) +* +* (from Schur decomposition) +* + CALL SGET22( 'Trans', 'N', 'Conj', N, A, LDA, EVECTL, LDU, + $ WR1, WI1, WORK, DUMMA( 3 ) ) + RESULT( 16 ) = DUMMA( 3 ) + IF( DUMMA( 4 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Left', 'STREVC3', DUMMA( 4 ), + $ N, JTYPE, IOLDSD + END IF +* * End of Loop -- Check for RESULT(j) > THRESH * 250 CONTINUE diff --git a/TESTING/EIG/zchkhs.f b/TESTING/EIG/zchkhs.f index 52962a0414..f5ae9b7f3c 100644 --- a/TESTING/EIG/zchkhs.f +++ b/TESTING/EIG/zchkhs.f @@ -21,7 +21,7 @@ * .. Array Arguments .. * LOGICAL DOTYPE( * ), SELECT( * ) * INTEGER ISEED( 4 ), IWORK( * ), NN( * ) -* DOUBLE PRECISION RESULT( 14 ), RWORK( * ) +* DOUBLE PRECISION RESULT( 16 ), RWORK( * ) * COMPLEX*16 A( LDA, * ), EVECTL( LDU, * ), * $ EVECTR( LDU, * ), EVECTX( LDU, * ), * $ EVECTY( LDU, * ), H( LDA, * ), T1( LDA, * ), @@ -64,10 +64,15 @@ *> eigenvectors of H. Y is lower triangular, and X is *> upper triangular. *> +*> ZTREVC3 computes left and right eigenvector matrices +*> from a Schur matrix T and backtransforms them with Z +*> to eigenvector matrices L and R for A. L and R are +*> GE matrices. +*> *> When ZCHKHS is called, a number of matrix "sizes" ("n's") and a *> number of matrix "types" are specified. For each size ("n") *> and each type of matrix, one matrix will be generated and used -*> to test the nonsymmetric eigenroutines. For each matrix, 14 +*> to test the nonsymmetric eigenroutines. For each matrix, 16 *> tests will be performed: *> *> (1) | A - U H U**H | / ( |A| n ulp ) @@ -98,6 +103,10 @@ *> *> (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) *> +*> (15) | AR - RW | / ( |A| |R| ulp ) +*> +*> (16) | LA - WL | / ( |A| |L| ulp ) +*> *> The "sizes" are specified by an array NN(1:NSIZES); the value of *> each element NN(j) specifies one size. *> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); @@ -331,7 +340,7 @@ *> Workspace. Could be equivalenced to IWORK, but not RWORK. *> Modified. *> -*> RESULT - DOUBLE PRECISION array, dimension (14) +*> RESULT - DOUBLE PRECISION array, dimension (16) *> The values computed by the fourteen tests described above. *> The values are currently limited to 1/ulp, to avoid *> overflow. @@ -421,7 +430,7 @@ SUBROUTINE ZCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * .. Array Arguments .. LOGICAL DOTYPE( * ), SELECT( * ) INTEGER ISEED( 4 ), IWORK( * ), NN( * ) - DOUBLE PRECISION RESULT( 14 ), RWORK( * ) + DOUBLE PRECISION RESULT( 16 ), RWORK( * ) COMPLEX*16 A( LDA, * ), EVECTL( LDU, * ), $ EVECTR( LDU, * ), EVECTX( LDU, * ), $ EVECTY( LDU, * ), H( LDA, * ), T1( LDA, * ), @@ -464,7 +473,7 @@ SUBROUTINE ZCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, EXTERNAL DLABAD, DLAFTS, DLASUM, XERBLA, ZCOPY, ZGEHRD, $ ZGEMM, ZGET10, ZGET22, ZHSEIN, ZHSEQR, ZHST01, $ ZLACPY, ZLASET, ZLATME, ZLATMR, ZLATMS, ZTREVC, - $ ZUNGHR, ZUNMHR + $ ZTREVC3, ZUNGHR, ZUNMHR * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, SQRT @@ -1067,6 +1076,66 @@ SUBROUTINE ZCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, $ RESULT( 14 ) = DUMMA( 3 )*ANINV END IF * +* Compute Left and Right Eigenvectors of A +* +* Compute a Right eigenvector matrix: +* + NTEST = 15 + RESULT( 15 ) = ULPINV +* + CALL ZLACPY( ' ', N, N, UZ, LDU, EVECTR, LDU ) +* + CALL ZTREVC3( 'Right', 'Back', SELECT, N, T1, LDA, CDUMMA, + $ LDU, EVECTR, LDU, N, IN, WORK, NWORK, RWORK, + $ N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZTREVC3(R,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 15: | AR - RW | / ( |A| |R| ulp ) +* +* (from Schur decomposition) +* + CALL ZGET22( 'N', 'N', 'N', N, A, LDA, EVECTR, LDU, W1, + $ WORK, RWORK, DUMMA( 1 ) ) + RESULT( 15 ) = DUMMA( 1 ) + IF( DUMMA( 2 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Right', 'ZTREVC3', + $ DUMMA( 2 ), N, JTYPE, IOLDSD + END IF +* +* Compute a Left eigenvector matrix: +* + NTEST = 16 + RESULT( 16 ) = ULPINV +* + CALL ZLACPY( ' ', N, N, UZ, LDU, EVECTL, LDU ) +* + CALL ZTREVC3( 'Left', 'Back', SELECT, N, T1, LDA, EVECTL, + $ LDU, CDUMMA, LDU, N, IN, WORK, NWORK, RWORK, + $ N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZTREVC3(L,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 16: | LA - WL | / ( |A| |L| ulp ) +* +* (from Schur decomposition) +* + CALL ZGET22( 'Conj', 'N', 'Conj', N, A, LDA, EVECTL, LDU, + $ W1, WORK, RWORK, DUMMA( 3 ) ) + RESULT( 16 ) = DUMMA( 3 ) + IF( DUMMA( 4 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Left', 'ZTREVC3', DUMMA( 4 ), + $ N, JTYPE, IOLDSD + END IF +* * End of Loop -- Check for RESULT(j) > THRESH * 240 CONTINUE From 9f9295f5729931a00dc5c3b93da3c8541584ab82 Mon Sep 17 00:00:00 2001 From: Angelika Schwarz Date: Sun, 3 Jul 2022 20:40:54 +0100 Subject: [PATCH 25/90] standardize style in laqr5 --- SRC/claqr5.f | 25 ++++++++++++++----------- SRC/dlaqr5.f | 23 ++++++++++++++--------- SRC/slaqr5.f | 23 ++++++++++++++--------- SRC/zlaqr5.f | 25 ++++++++++++++----------- 4 files changed, 56 insertions(+), 40 deletions(-) diff --git a/SRC/claqr5.f b/SRC/claqr5.f index 0a01cc2265..4e6f43a73d 100644 --- a/SRC/claqr5.f +++ b/SRC/claqr5.f @@ -533,11 +533,13 @@ SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, * . Mth bulge. Exploit fact that first two elements * . of row are actually zero. ==== * - REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) - H( K+3, K ) = -REFSUM - H( K+3, K+1 ) = -REFSUM*CONJG( V( 2, M ) ) - H( K+3, K+2 ) = H( K+3, K+2 ) - - $ REFSUM*CONJG( V( 3, M ) ) + T1 = V( 1, M ) + T2 = T1*CONJG( V( 2, M ) ) + T3 = T1*CONJG( V( 3, M ) ) + REFSUM = V( 3, M )*H( K+3, K+2 ) + H( K+3, K ) = -REFSUM*T1 + H( K+3, K+1 ) = -REFSUM*T2 + H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*T3 * * ==== Calculate reflection to move * . Mth bulge one step. ==== @@ -572,12 +574,13 @@ SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, $ S( 2*M ), VT ) ALPHA = VT( 1 ) CALL CLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) - REFSUM = CONJG( VT( 1 ) )* - $ ( H( K+1, K )+CONJG( VT( 2 ) )* - $ H( K+2, K ) ) + T1 = CONJG( VT( 1 ) ) + T2 = T1*VT( 2 ) + T3 = T1*VT( 3 ) + REFSUM = H( K+1, K )+CONJG( VT( 2 ) )*H( K+2, K ) * - IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+ - $ CABS1( REFSUM*VT( 3 ) ).GT.ULP* + IF( CABS1( H( K+2, K )-REFSUM*T2 )+ + $ CABS1( REFSUM*T3 ).GT.ULP* $ ( CABS1( H( K, K ) )+CABS1( H( K+1, $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN * @@ -595,7 +598,7 @@ SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, * . Replace the old reflector with * . the new one. ==== * - H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+1, K ) = H( K+1, K ) - REFSUM*T1 H( K+2, K ) = ZERO H( K+3, K ) = ZERO V( 1, M ) = VT( 1 ) diff --git a/SRC/dlaqr5.f b/SRC/dlaqr5.f index 43b4ac72a3..cc94b12223 100644 --- a/SRC/dlaqr5.f +++ b/SRC/dlaqr5.f @@ -558,10 +558,13 @@ SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * . Mth bulge. Exploit fact that first two elements * . of row are actually zero. ==== * - REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) - H( K+3, K ) = -REFSUM - H( K+3, K+1 ) = -REFSUM*V( 2, M ) - H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*V( 3, M ) + T1 = V( 1, M ) + T2 = T1*V( 2, M ) + T3 = T1*V( 3, M ) + REFSUM = V( 3, M )*H( K+3, K+2 ) + H( K+3, K ) = -REFSUM*T1 + H( K+3, K+1 ) = -REFSUM*T2 + H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*T3 * * ==== Calculate reflection to move * . Mth bulge one step. ==== @@ -597,11 +600,13 @@ SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, $ VT ) ALPHA = VT( 1 ) CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) - REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* - $ H( K+2, K ) ) + T1 = VT( 1 ) + T2 = T1*VT( 2 ) + T3 = T1*VT( 3 ) + REFSUM = H( K+1, K ) + VT( 2 )*H( K+2, K ) * - IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ - $ ABS( REFSUM*VT( 3 ) ).GT.ULP* + IF( ABS( H( K+2, K )-REFSUM*T2 )+ + $ ABS( REFSUM*T3 ).GT.ULP* $ ( ABS( H( K, K ) )+ABS( H( K+1, $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN * @@ -619,7 +624,7 @@ SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * . Replace the old reflector with * . the new one. ==== * - H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+1, K ) = H( K+1, K ) - REFSUM*T1 H( K+2, K ) = ZERO H( K+3, K ) = ZERO V( 1, M ) = VT( 1 ) diff --git a/SRC/slaqr5.f b/SRC/slaqr5.f index a4f805674d..b10e597542 100644 --- a/SRC/slaqr5.f +++ b/SRC/slaqr5.f @@ -558,10 +558,13 @@ SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * . Mth bulge. Exploit fact that first two elements * . of row are actually zero. ==== * - REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) - H( K+3, K ) = -REFSUM - H( K+3, K+1 ) = -REFSUM*V( 2, M ) - H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*V( 3, M ) + T1 = V( 1, M ) + T2 = T1*V( 2, M ) + T3 = T1*V( 3, M ) + REFSUM = V( 3, M )*H( K+3, K+2 ) + H( K+3, K ) = -REFSUM*T1 + H( K+3, K+1 ) = -REFSUM*T2 + H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*T3 * * ==== Calculate reflection to move * . Mth bulge one step. ==== @@ -597,11 +600,13 @@ SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, $ VT ) ALPHA = VT( 1 ) CALL SLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) - REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* - $ H( K+2, K ) ) + T1 = VT( 1 ) + T2 = T1*VT( 2 ) + T3 = T2*VT( 3 ) + REFSUM = H( K+1, K )+VT( 2 )*H( K+2, K ) * - IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ - $ ABS( REFSUM*VT( 3 ) ).GT.ULP* + IF( ABS( H( K+2, K )-REFSUM*T2 )+ + $ ABS( REFSUM*T3 ).GT.ULP* $ ( ABS( H( K, K ) )+ABS( H( K+1, $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN * @@ -619,7 +624,7 @@ SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * . Replace the old reflector with * . the new one. ==== * - H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+1, K ) = H( K+1, K ) - REFSUM*T1 H( K+2, K ) = ZERO H( K+3, K ) = ZERO V( 1, M ) = VT( 1 ) diff --git a/SRC/zlaqr5.f b/SRC/zlaqr5.f index 4fa5ee5b06..d8c521349e 100644 --- a/SRC/zlaqr5.f +++ b/SRC/zlaqr5.f @@ -533,11 +533,13 @@ SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, * . Mth bulge. Exploit fact that first two elements * . of row are actually zero. ==== * - REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) - H( K+3, K ) = -REFSUM - H( K+3, K+1 ) = -REFSUM*DCONJG( V( 2, M ) ) - H( K+3, K+2 ) = H( K+3, K+2 ) - - $ REFSUM*DCONJG( V( 3, M ) ) + T1 = V( 1, M ) + T2 = T1*DCONJG( V( 2, M ) ) + T3 = T1*DCONJG( V( 3, M ) ) + REFSUM = V( 3, M )*H( K+3, K+2 ) + H( K+3, K ) = -REFSUM*T1 + H( K+3, K+1 ) = -REFSUM*T2 + H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*T3 * * ==== Calculate reflection to move * . Mth bulge one step. ==== @@ -572,12 +574,13 @@ SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, $ S( 2*M ), VT ) ALPHA = VT( 1 ) CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) - REFSUM = DCONJG( VT( 1 ) )* - $ ( H( K+1, K )+DCONJG( VT( 2 ) )* - $ H( K+2, K ) ) + T1 = DCONJG( VT( 1 ) ) + T2 = T1*VT( 2 ) + T3 = T1*VT( 3 ) + REFSUM = H( K+1, K )+DCONJG( VT( 2 ) )*H( K+2, K ) * - IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+ - $ CABS1( REFSUM*VT( 3 ) ).GT.ULP* + IF( CABS1( H( K+2, K )-REFSUM*T2 )+ + $ CABS1( REFSUM*T3 ).GT.ULP* $ ( CABS1( H( K, K ) )+CABS1( H( K+1, $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN * @@ -595,7 +598,7 @@ SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, * . Replace the old reflector with * . the new one. ==== * - H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+1, K ) = H( K+1, K ) - REFSUM*T1 H( K+2, K ) = ZERO H( K+3, K ) = ZERO V( 1, M ) = VT( 1 ) From 0a6cd431891bbb9cef57e461e023ac7f0c9e5d8e Mon Sep 17 00:00:00 2001 From: Angelika Schwarz Date: Tue, 5 Jul 2022 19:34:23 +0100 Subject: [PATCH 26/90] Rewrite [ds]hgeqz to use FMA with Householder reflectors --- SRC/dhgeqz.f | 70 +++++++++++++++++++++++++++------------------------- SRC/shgeqz.f | 70 +++++++++++++++++++++++++++------------------------- 2 files changed, 74 insertions(+), 66 deletions(-) diff --git a/SRC/dhgeqz.f b/SRC/dhgeqz.f index 3fe2a083c8..5f69563eeb 100644 --- a/SRC/dhgeqz.f +++ b/SRC/dhgeqz.f @@ -337,9 +337,9 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, - $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, - $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, - $ WR2 + $ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, + $ U12, U12L, U2, ULP, VS, W11, W12, W21, W22, + $ WABS, WI, WR, WR2 * .. * .. Local Arrays .. DOUBLE PRECISION V( 3 ) @@ -1132,25 +1132,27 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, H( J+2, J-1 ) = ZERO END IF * + T2 = TAU*V( 2 ) + T3 = TAU*V( 3 ) DO 230 JC = J, ILASTM - TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* - $ H( J+2, JC ) ) - H( J, JC ) = H( J, JC ) - TEMP - H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) - H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) - TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* - $ T( J+2, JC ) ) - T( J, JC ) = T( J, JC ) - TEMP2 - T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) - T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) + TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* + $ H( J+2, JC ) + H( J, JC ) = H( J, JC ) - TEMP*TAU + H( J+1, JC ) = H( J+1, JC ) - TEMP*T2 + H( J+2, JC ) = H( J+2, JC ) - TEMP*T3 + TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* + $ T( J+2, JC ) + T( J, JC ) = T( J, JC ) - TEMP2*TAU + T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2 + T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3 230 CONTINUE IF( ILQ ) THEN DO 240 JR = 1, N - TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* - $ Q( JR, J+2 ) ) - Q( JR, J ) = Q( JR, J ) - TEMP - Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) - Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) + Q( JR, J ) = Q( JR, J ) - TEMP*TAU + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2 + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3 240 CONTINUE END IF * @@ -1238,27 +1240,29 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, * * Apply transformations from the right. * + T2 = TAU*V(2) + T3 = TAU*V(3) DO 260 JR = IFRSTM, MIN( J+3, ILAST ) - TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* - $ H( JR, J+2 ) ) - H( JR, J ) = H( JR, J ) - TEMP - H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) - H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) + TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* + $ H( JR, J+2 ) + H( JR, J ) = H( JR, J ) - TEMP*TAU + H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2 + H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3 260 CONTINUE DO 270 JR = IFRSTM, J + 2 - TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* - $ T( JR, J+2 ) ) - T( JR, J ) = T( JR, J ) - TEMP - T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) - T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) + TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* + $ T( JR, J+2 ) + T( JR, J ) = T( JR, J ) - TEMP*TAU + T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2 + T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3 270 CONTINUE IF( ILZ ) THEN DO 280 JR = 1, N - TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* - $ Z( JR, J+2 ) ) - Z( JR, J ) = Z( JR, J ) - TEMP - Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) - Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) + Z( JR, J ) = Z( JR, J ) - TEMP*TAU + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2 + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3 280 CONTINUE END IF T( J+1, J ) = ZERO diff --git a/SRC/shgeqz.f b/SRC/shgeqz.f index 79a9c60925..635bb8b20c 100644 --- a/SRC/shgeqz.f +++ b/SRC/shgeqz.f @@ -337,9 +337,9 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, - $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, - $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, - $ WR2 + $ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, + $ U12, U12L, U2, ULP, VS, W11, W12, W21, W22, + $ WABS, WI, WR, WR2 * .. * .. Local Arrays .. REAL V( 3 ) @@ -1132,25 +1132,27 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, H( J+2, J-1 ) = ZERO END IF * + T2 = TAU * V( 2 ) + T3 = TAU * V( 3 ) DO 230 JC = J, ILASTM - TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* - $ H( J+2, JC ) ) - H( J, JC ) = H( J, JC ) - TEMP - H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) - H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) - TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* - $ T( J+2, JC ) ) - T( J, JC ) = T( J, JC ) - TEMP2 - T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) - T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) + TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* + $ H( J+2, JC ) + H( J, JC ) = H( J, JC ) - TEMP*TAU + H( J+1, JC ) = H( J+1, JC ) - TEMP*T2 + H( J+2, JC ) = H( J+2, JC ) - TEMP*T3 + TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* + $ T( J+2, JC ) + T( J, JC ) = T( J, JC ) - TEMP2*TAU + T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2 + T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3 230 CONTINUE IF( ILQ ) THEN DO 240 JR = 1, N - TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* - $ Q( JR, J+2 ) ) - Q( JR, J ) = Q( JR, J ) - TEMP - Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) - Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) + Q( JR, J ) = Q( JR, J ) - TEMP*TAU + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2 + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3 240 CONTINUE END IF * @@ -1238,27 +1240,29 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, * * Apply transformations from the right. * + T2 = TAU*V( 2 ) + T3 = TAU*V( 3 ) DO 260 JR = IFRSTM, MIN( J+3, ILAST ) - TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* - $ H( JR, J+2 ) ) - H( JR, J ) = H( JR, J ) - TEMP - H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) - H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) + TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* + $ H( JR, J+2 ) + H( JR, J ) = H( JR, J ) - TEMP*TAU + H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2 + H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3 260 CONTINUE DO 270 JR = IFRSTM, J + 2 - TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* - $ T( JR, J+2 ) ) - T( JR, J ) = T( JR, J ) - TEMP - T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) - T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) + TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* + $ T( JR, J+2 ) + T( JR, J ) = T( JR, J ) - TEMP*TAU + T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2 + T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3 270 CONTINUE IF( ILZ ) THEN DO 280 JR = 1, N - TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* - $ Z( JR, J+2 ) ) - Z( JR, J ) = Z( JR, J ) - TEMP - Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) - Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) + Z( JR, J ) = Z( JR, J ) - TEMP*TAU + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2 + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3 280 CONTINUE END IF T( J+1, J ) = ZERO From 64a0db9b190a685807574a7eb912156263840cb7 Mon Sep 17 00:00:00 2001 From: Weslley da Silva Pereira Date: Mon, 10 Oct 2022 13:51:07 -0600 Subject: [PATCH 27/90] Applies fixes thanks to @angsch --- LAPACKE/src/lapacke_dtpmqrt_work.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/LAPACKE/src/lapacke_dtpmqrt_work.c b/LAPACKE/src/lapacke_dtpmqrt_work.c index 0b5b06e87f..7eb24a079b 100644 --- a/LAPACKE/src/lapacke_dtpmqrt_work.c +++ b/LAPACKE/src/lapacke_dtpmqrt_work.c @@ -49,8 +49,8 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { lapack_int nrowsA, ncolsA, nrowsV; - if ( side == 'L' ) { nrowsA = k; ncolsA = n; nrowsV = m; } - else if ( side == 'R' ) { nrowsA = m; ncolsA = k; nrowsV = n; } + if ( side == LAPACKE_lsame(side, 'l') ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == LAPACKE_lsame(side, 'r') ) { nrowsA = m; ncolsA = k; nrowsV = n; } else { info = -2; LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); @@ -118,8 +118,8 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_dge_trans( LAPACK_COL_MAJOR, ncolsA, nrowsA, a_t, lda_t, a, lda ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, m, b_t, ldb_t, b, ldb ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrowsA, ncolsA, a_t, lda_t, a, lda ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); exit_level_3: From c9e012152f028190b1bf0f7c601d4e1d16135f73 Mon Sep 17 00:00:00 2001 From: Weslley da Silva Pereira Date: Wed, 12 Oct 2022 13:14:21 -0600 Subject: [PATCH 28/90] Apply the fixes on lapacke_(c,s,z)tpmqrt_work.c --- LAPACKE/src/lapacke_ctpmqrt_work.c | 32 +++++++++++++++++++----------- LAPACKE/src/lapacke_dtpmqrt_work.c | 2 +- LAPACKE/src/lapacke_stpmqrt_work.c | 32 +++++++++++++++++++----------- LAPACKE/src/lapacke_ztpmqrt_work.c | 32 +++++++++++++++++++----------- 4 files changed, 61 insertions(+), 37 deletions(-) diff --git a/LAPACKE/src/lapacke_ctpmqrt_work.c b/LAPACKE/src/lapacke_ctpmqrt_work.c index 5ec948e7bb..e01664bdf8 100644 --- a/LAPACKE/src/lapacke_ctpmqrt_work.c +++ b/LAPACKE/src/lapacke_ctpmqrt_work.c @@ -50,16 +50,24 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,k); + lapack_int nrowsA, ncolsA, nrowsV; + if ( side == LAPACKE_lsame(side, 'l') ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == LAPACKE_lsame(side, 'r') ) { nrowsA = m; ncolsA = k; nrowsV = n; } + else { + info = -2; + LAPACKE_xerbla( "LAPACKE_ctpmqrt_work", info ); + return info; + } + lapack_int lda_t = MAX(1,nrowsA); lapack_int ldb_t = MAX(1,m); - lapack_int ldt_t = MAX(1,ldt); - lapack_int ldv_t = MAX(1,ldv); + lapack_int ldt_t = MAX(1,nb); + lapack_int ldv_t = MAX(1,nrowsV); lapack_complex_float* v_t = NULL; lapack_complex_float* t_t = NULL; lapack_complex_float* a_t = NULL; lapack_complex_float* b_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_ctpmqrt_work", info ); return info; @@ -69,7 +77,7 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_ctpmqrt_work", info ); return info; } - if( ldt < nb ) { + if( ldt < k ) { info = -12; LAPACKE_xerbla( "LAPACKE_ctpmqrt_work", info ); return info; @@ -87,13 +95,13 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_0; } t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,nb) ); + LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,k) ); if( t_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } a_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,m) ); + LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,ncolsA) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; @@ -105,10 +113,10 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_3; } /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_layout, ldv, k, v, ldv, v_t, ldv_t ); - LAPACKE_cge_trans( matrix_layout, ldt, nb, t, ldt, t_t, ldt_t ); - LAPACKE_cge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); - LAPACKE_cge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_cge_trans( LAPACK_ROW_MAJOR, nrowsV, k, v, ldv, v_t, ldv_t ); + LAPACKE_cge_trans( LAPACK_ROW_MAJOR, nb, k, t, ldt, t_t, ldt_t ); + LAPACKE_cge_trans( LAPACK_ROW_MAJOR, nrowsA, ncolsA, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( LAPACK_ROW_MAJOR, m, n, b, ldb, b_t, ldb_t ); /* Call LAPACK function and adjust info */ LAPACK_ctpmqrt( &side, &trans, &m, &n, &k, &l, &nb, v_t, &ldv_t, t_t, &ldt_t, a_t, &lda_t, b_t, &ldb_t, work, &info ); @@ -116,7 +124,7 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_cge_trans( LAPACK_COL_MAJOR, k, m, a_t, lda_t, a, lda ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrowsA, ncolsA, a_t, lda_t, a, lda ); LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); diff --git a/LAPACKE/src/lapacke_dtpmqrt_work.c b/LAPACKE/src/lapacke_dtpmqrt_work.c index 7eb24a079b..366acd3690 100644 --- a/LAPACKE/src/lapacke_dtpmqrt_work.c +++ b/LAPACKE/src/lapacke_dtpmqrt_work.c @@ -65,7 +65,7 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, double* a_t = NULL; double* b_t = NULL; /* Check leading dimension(s) */ - if ( lda < ncolsA ) { + if( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); return info; diff --git a/LAPACKE/src/lapacke_stpmqrt_work.c b/LAPACKE/src/lapacke_stpmqrt_work.c index 095fbdcd9f..c5a3a14965 100644 --- a/LAPACKE/src/lapacke_stpmqrt_work.c +++ b/LAPACKE/src/lapacke_stpmqrt_work.c @@ -48,16 +48,24 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,k); + lapack_int nrowsA, ncolsA, nrowsV; + if ( side == LAPACKE_lsame(side, 'l') ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == LAPACKE_lsame(side, 'r') ) { nrowsA = m; ncolsA = k; nrowsV = n; } + else { + info = -2; + LAPACKE_xerbla( "LAPACKE_stpmqrt_work", info ); + return info; + } + lapack_int lda_t = MAX(1,nrowsA); lapack_int ldb_t = MAX(1,m); - lapack_int ldt_t = MAX(1,ldt); - lapack_int ldv_t = MAX(1,ldv); + lapack_int ldt_t = MAX(1,nb); + lapack_int ldv_t = MAX(1,nrowsV); float* v_t = NULL; float* t_t = NULL; float* a_t = NULL; float* b_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_stpmqrt_work", info ); return info; @@ -67,7 +75,7 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_stpmqrt_work", info ); return info; } - if( ldt < nb ) { + if( ldt < k ) { info = -12; LAPACKE_xerbla( "LAPACKE_stpmqrt_work", info ); return info; @@ -83,12 +91,12 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; } - t_t = (float*)LAPACKE_malloc( sizeof(float) * ldt_t * MAX(1,nb) ); + t_t = (float*)LAPACKE_malloc( sizeof(float) * ldt_t * MAX(1,k) ); if( t_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } - a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) ); + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,ncolsA) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; @@ -99,10 +107,10 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_3; } /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_layout, ldv, k, v, ldv, v_t, ldv_t ); - LAPACKE_sge_trans( matrix_layout, ldt, nb, t, ldt, t_t, ldt_t ); - LAPACKE_sge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); - LAPACKE_sge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_sge_trans( LAPACK_ROW_MAJOR, nrowsV, k, v, ldv, v_t, ldv_t ); + LAPACKE_sge_trans( LAPACK_ROW_MAJOR, nb, k, t, ldt, t_t, ldt_t ); + LAPACKE_sge_trans( LAPACK_ROW_MAJOR, nrowsA, ncolsA, a, lda, a_t, lda_t ); + LAPACKE_sge_trans( LAPACK_ROW_MAJOR, m, n, b, ldb, b_t, ldb_t ); /* Call LAPACK function and adjust info */ LAPACK_stpmqrt( &side, &trans, &m, &n, &k, &l, &nb, v_t, &ldv_t, t_t, &ldt_t, a_t, &lda_t, b_t, &ldb_t, work, &info ); @@ -110,7 +118,7 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_sge_trans( LAPACK_COL_MAJOR, k, m, a_t, lda_t, a, lda ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrowsA, ncolsA, a_t, lda_t, a, lda ); LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); diff --git a/LAPACKE/src/lapacke_ztpmqrt_work.c b/LAPACKE/src/lapacke_ztpmqrt_work.c index 643ae1d9d5..104efa8f3c 100644 --- a/LAPACKE/src/lapacke_ztpmqrt_work.c +++ b/LAPACKE/src/lapacke_ztpmqrt_work.c @@ -50,16 +50,24 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,k); + lapack_int nrowsA, ncolsA, nrowsV; + if ( side == LAPACKE_lsame(side, 'l') ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == LAPACKE_lsame(side, 'r') ) { nrowsA = m; ncolsA = k; nrowsV = n; } + else { + info = -2; + LAPACKE_xerbla( "LAPACKE_ztpmqrt_work", info ); + return info; + } + lapack_int lda_t = MAX(1,nrowsA); lapack_int ldb_t = MAX(1,m); - lapack_int ldt_t = MAX(1,ldt); - lapack_int ldv_t = MAX(1,ldv); + lapack_int ldt_t = MAX(1,nb); + lapack_int ldv_t = MAX(1,nrowsV); lapack_complex_double* v_t = NULL; lapack_complex_double* t_t = NULL; lapack_complex_double* a_t = NULL; lapack_complex_double* b_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_ztpmqrt_work", info ); return info; @@ -69,7 +77,7 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_ztpmqrt_work", info ); return info; } - if( ldt < nb ) { + if( ldt < k ) { info = -12; LAPACKE_xerbla( "LAPACKE_ztpmqrt_work", info ); return info; @@ -87,13 +95,13 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_0; } t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * ldt_t * MAX(1,nb) ); + LAPACKE_malloc( sizeof(lapack_complex_double) * ldt_t * MAX(1,k) ); if( t_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } a_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,m) ); + LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,ncolsA) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; @@ -105,10 +113,10 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_3; } /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_layout, ldv, k, v, ldv, v_t, ldv_t ); - LAPACKE_zge_trans( matrix_layout, ldt, nb, t, ldt, t_t, ldt_t ); - LAPACKE_zge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); - LAPACKE_zge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_zge_trans( LAPACK_ROW_MAJOR, nrowsV, k, v, ldv, v_t, ldv_t ); + LAPACKE_zge_trans( LAPACK_ROW_MAJOR, nb, k, t, ldt, t_t, ldt_t ); + LAPACKE_zge_trans( LAPACK_ROW_MAJOR, nrowsA, ncolsA, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( LAPACK_ROW_MAJOR, m, n, b, ldb, b_t, ldb_t ); /* Call LAPACK function and adjust info */ LAPACK_ztpmqrt( &side, &trans, &m, &n, &k, &l, &nb, v_t, &ldv_t, t_t, &ldt_t, a_t, &lda_t, b_t, &ldb_t, work, &info ); @@ -116,7 +124,7 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_zge_trans( LAPACK_COL_MAJOR, k, m, a_t, lda_t, a, lda ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrowsA, ncolsA, a_t, lda_t, a, lda ); LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); From 8371b0b9de6ad3ac101cdc20bc2ede215066fe77 Mon Sep 17 00:00:00 2001 From: Ikko Ashimine Date: Thu, 20 Oct 2022 01:16:46 +0900 Subject: [PATCH 29/90] Fix typo in ieeeck.f inifinity -> infinity --- SRC/ieeeck.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SRC/ieeeck.f b/SRC/ieeeck.f index 74065c3b4e..f9f6332ecf 100644 --- a/SRC/ieeeck.f +++ b/SRC/ieeeck.f @@ -41,7 +41,7 @@ *> \param[in] ISPEC *> \verbatim *> ISPEC is INTEGER -*> Specifies whether to test just for inifinity arithmetic +*> Specifies whether to test just for infinity arithmetic *> or whether to test for infinity and NaN arithmetic. *> = 0: Verify infinity arithmetic only. *> = 1: Verify infinity and NaN arithmetic. From ed72332590b8cacb4ad9e100d6b7ad6c260f890b Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 23 Oct 2022 15:42:57 +0200 Subject: [PATCH 30/90] Added LAPACK_?langb.o objects to the Makefile. --- LAPACKE/src/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/LAPACKE/src/Makefile b/LAPACKE/src/Makefile index fdd62eab21..65e0e557f7 100644 --- a/LAPACKE/src/Makefile +++ b/LAPACKE/src/Makefile @@ -356,6 +356,8 @@ lapacke_clacrm.o \ lapacke_clacrm_work.o \ lapacke_clag2z.o \ lapacke_clag2z_work.o \ +lapacke_clangb.o \ +lapacke_clangb_work.o \ lapacke_clange.o \ lapacke_clange_work.o \ lapacke_clanhe.o \ @@ -840,6 +842,8 @@ lapacke_dlag2s.o \ lapacke_dlag2s_work.o \ lapacke_dlamch.o \ lapacke_dlamch_work.o \ +lapacke_dlangb.o \ +lapacke_dlangb_work.o \ lapacke_dlange.o \ lapacke_dlange_work.o \ lapacke_dlansy.o \ @@ -1412,6 +1416,8 @@ lapacke_slag2d.o \ lapacke_slag2d_work.o \ lapacke_slamch.o \ lapacke_slamch_work.o \ +lapacke_slangb.o \ +lapacke_slangb_work.o \ lapacke_slange.o \ lapacke_slange_work.o \ lapacke_slansy.o \ @@ -2114,6 +2120,8 @@ lapacke_zlacrm.o \ lapacke_zlacrm_work.o \ lapacke_zlag2c.o \ lapacke_zlag2c_work.o \ +lapacke_zlangb.o \ +lapacke_zlangb_work.o \ lapacke_zlange.o \ lapacke_zlange_work.o \ lapacke_zlanhe.o \ From 9d4f2e0cc724df52889fedfe79942533441bfcbb Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Thu, 27 Oct 2022 17:26:11 +0200 Subject: [PATCH 31/90] Added NaN check for trapezoidal matrices and applied it to LAPACKE_?lantr --- LAPACKE/include/lapacke_utils.h | 14 +++ LAPACKE/src/lapacke_clantr.c | 6 +- LAPACKE/src/lapacke_dlantr.c | 2 +- LAPACKE/src/lapacke_slantr.c | 2 +- LAPACKE/src/lapacke_zlantr.c | 2 +- LAPACKE/utils/CMakeLists.txt | 88 ++++++++++------- LAPACKE/utils/Makefile | 4 + LAPACKE/utils/lapacke_ctz_nancheck.c | 142 +++++++++++++++++++++++++++ LAPACKE/utils/lapacke_dtz_nancheck.c | 141 ++++++++++++++++++++++++++ LAPACKE/utils/lapacke_stz_nancheck.c | 141 ++++++++++++++++++++++++++ LAPACKE/utils/lapacke_ztz_nancheck.c | 142 +++++++++++++++++++++++++++ 11 files changed, 641 insertions(+), 43 deletions(-) create mode 100644 LAPACKE/utils/lapacke_ctz_nancheck.c create mode 100644 LAPACKE/utils/lapacke_dtz_nancheck.c create mode 100644 LAPACKE/utils/lapacke_stz_nancheck.c create mode 100644 LAPACKE/utils/lapacke_ztz_nancheck.c diff --git a/LAPACKE/include/lapacke_utils.h b/LAPACKE/include/lapacke_utils.h index 95979928d2..9fe2ae7355 100644 --- a/LAPACKE/include/lapacke_utils.h +++ b/LAPACKE/include/lapacke_utils.h @@ -376,6 +376,10 @@ lapack_logical LAPACKE_ctr_nancheck( int matrix_layout, char uplo, char diag, lapack_int n, const lapack_complex_float *a, lapack_int lda ); +lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float *a, + lapack_int lda ); lapack_logical LAPACKE_dgb_nancheck( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, @@ -440,6 +444,9 @@ lapack_logical LAPACKE_dtr_nancheck( int matrix_layout, char uplo, char diag, lapack_int n, const double *a, lapack_int lda ); +lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const double *a, lapack_int lda ); lapack_logical LAPACKE_sgb_nancheck( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, @@ -504,6 +511,9 @@ lapack_logical LAPACKE_str_nancheck( int matrix_layout, char uplo, char diag, lapack_int n, const float *a, lapack_int lda ); +lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const float *a, lapack_int lda ); lapack_logical LAPACKE_zgb_nancheck( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, @@ -574,6 +584,10 @@ lapack_logical LAPACKE_ztr_nancheck( int matrix_layout, char uplo, char diag, lapack_int n, const lapack_complex_double *a, lapack_int lda ); +lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double *a, + lapack_int lda ); #ifdef __cplusplus } diff --git a/LAPACKE/src/lapacke_clantr.c b/LAPACKE/src/lapacke_clantr.c index 88e765f2b8..e00b6c5788 100644 --- a/LAPACKE/src/lapacke_clantr.c +++ b/LAPACKE/src/lapacke_clantr.c @@ -33,8 +33,8 @@ #include "lapacke_utils.h" float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag, - lapack_int m, lapack_int n, const lapack_complex_float* a, - lapack_int lda ) + lapack_int m, lapack_int n, const lapack_complex_float* a, + lapack_int lda ) { lapack_int info = 0; float res = 0.; @@ -46,7 +46,7 @@ float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag, #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + if( LAPACKE_ctz_nancheck( matrix_layout, 'f', uplo, diag, m, n, a, lda ) ) { return -7; } } diff --git a/LAPACKE/src/lapacke_dlantr.c b/LAPACKE/src/lapacke_dlantr.c index 4d1be93d73..b20af0eb46 100644 --- a/LAPACKE/src/lapacke_dlantr.c +++ b/LAPACKE/src/lapacke_dlantr.c @@ -46,7 +46,7 @@ double LAPACKE_dlantr( int matrix_layout, char norm, char uplo, char diag, #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + if( LAPACKE_dtz_nancheck( matrix_layout, 'f', uplo, diag, m, n, a, lda ) ) { return -7; } } diff --git a/LAPACKE/src/lapacke_slantr.c b/LAPACKE/src/lapacke_slantr.c index 2f4c65889a..e2f67cfd6c 100644 --- a/LAPACKE/src/lapacke_slantr.c +++ b/LAPACKE/src/lapacke_slantr.c @@ -46,7 +46,7 @@ float LAPACKE_slantr( int matrix_layout, char norm, char uplo, char diag, #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + if( LAPACKE_stz_nancheck( matrix_layout, 'f', uplo, diag, m, n, a, lda ) ) { return -7; } } diff --git a/LAPACKE/src/lapacke_zlantr.c b/LAPACKE/src/lapacke_zlantr.c index f6656d84da..4c078b9b0f 100644 --- a/LAPACKE/src/lapacke_zlantr.c +++ b/LAPACKE/src/lapacke_zlantr.c @@ -46,7 +46,7 @@ double LAPACKE_zlantr( int matrix_layout, char norm, char uplo, char diag, #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + if( LAPACKE_ztz_nancheck( matrix_layout, 'f', uplo, diag, m, n, a, lda ) ) { return -7; } } diff --git a/LAPACKE/utils/CMakeLists.txt b/LAPACKE/utils/CMakeLists.txt index dd36ee33e7..b137f68e40 100644 --- a/LAPACKE/utils/CMakeLists.txt +++ b/LAPACKE/utils/CMakeLists.txt @@ -1,39 +1,53 @@ set(UTILS -lapacke_c_nancheck.c lapacke_ctr_trans.c lapacke_make_complex_float.c lapacke_zgb_nancheck.c -lapacke_cgb_nancheck.c lapacke_d_nancheck.c lapacke_s_nancheck.c lapacke_zgb_trans.c -lapacke_cgb_trans.c lapacke_dgb_nancheck.c lapacke_sgb_nancheck.c lapacke_zge_nancheck.c -lapacke_cge_nancheck.c lapacke_dgb_trans.c lapacke_sgb_trans.c lapacke_zge_trans.c -lapacke_cge_trans.c lapacke_dge_nancheck.c lapacke_sge_nancheck.c lapacke_zgg_nancheck.c -lapacke_cgg_nancheck.c lapacke_dge_trans.c lapacke_sge_trans.c lapacke_zgg_trans.c -lapacke_cgg_trans.c lapacke_dgg_nancheck.c lapacke_sgg_nancheck.c lapacke_zgt_nancheck.c -lapacke_cgt_nancheck.c lapacke_dgg_trans.c lapacke_sgg_trans.c lapacke_zhb_nancheck.c -lapacke_chb_nancheck.c lapacke_dgt_nancheck.c lapacke_sgt_nancheck.c lapacke_zhb_trans.c -lapacke_chb_trans.c lapacke_dhs_nancheck.c lapacke_shs_nancheck.c lapacke_zhe_nancheck.c -lapacke_che_nancheck.c lapacke_dhs_trans.c lapacke_shs_trans.c lapacke_zhe_trans.c -lapacke_che_trans.c lapacke_dpb_nancheck.c lapacke_spb_nancheck.c lapacke_zhp_nancheck.c -lapacke_chp_nancheck.c lapacke_dpb_trans.c lapacke_spb_trans.c lapacke_zhp_trans.c -lapacke_chp_trans.c lapacke_dpf_nancheck.c lapacke_spf_nancheck.c lapacke_zhs_nancheck.c -lapacke_chs_nancheck.c lapacke_dpf_trans.c lapacke_spf_trans.c lapacke_zhs_trans.c -lapacke_chs_trans.c lapacke_dpo_nancheck.c lapacke_spo_nancheck.c lapacke_zpb_nancheck.c -lapacke_cpb_nancheck.c lapacke_dpo_trans.c lapacke_spo_trans.c lapacke_zpb_trans.c -lapacke_cpb_trans.c lapacke_dpp_nancheck.c lapacke_spp_nancheck.c lapacke_zpf_nancheck.c -lapacke_cpf_nancheck.c lapacke_dpp_trans.c lapacke_spp_trans.c lapacke_zpf_trans.c -lapacke_cpf_trans.c lapacke_dpt_nancheck.c lapacke_spt_nancheck.c lapacke_zpo_nancheck.c -lapacke_cpo_nancheck.c lapacke_dsb_nancheck.c lapacke_ssb_nancheck.c lapacke_zpo_trans.c -lapacke_cpo_trans.c lapacke_dsb_trans.c lapacke_ssb_trans.c lapacke_zpp_nancheck.c -lapacke_cpp_nancheck.c lapacke_dsp_nancheck.c lapacke_ssp_nancheck.c lapacke_zpp_trans.c -lapacke_cpp_trans.c lapacke_dsp_trans.c lapacke_ssp_trans.c lapacke_zpt_nancheck.c -lapacke_cpt_nancheck.c lapacke_dst_nancheck.c lapacke_sst_nancheck.c lapacke_zsp_nancheck.c -lapacke_csp_nancheck.c lapacke_dsy_nancheck.c lapacke_ssy_nancheck.c lapacke_zsp_trans.c -lapacke_csp_trans.c lapacke_dsy_trans.c lapacke_ssy_trans.c lapacke_zst_nancheck.c -lapacke_cst_nancheck.c lapacke_dtb_nancheck.c lapacke_stb_nancheck.c lapacke_zsy_nancheck.c -lapacke_csy_nancheck.c lapacke_dtb_trans.c lapacke_stb_trans.c lapacke_zsy_trans.c -lapacke_csy_trans.c lapacke_dtf_nancheck.c lapacke_stf_nancheck.c lapacke_ztb_nancheck.c -lapacke_ctb_nancheck.c lapacke_dtf_trans.c lapacke_stf_trans.c lapacke_ztb_trans.c -lapacke_ctb_trans.c lapacke_dtp_nancheck.c lapacke_stp_nancheck.c lapacke_ztf_nancheck.c -lapacke_ctf_nancheck.c lapacke_dtp_trans.c lapacke_stp_trans.c lapacke_ztf_trans.c -lapacke_ctf_trans.c lapacke_dtr_nancheck.c lapacke_str_nancheck.c lapacke_ztp_nancheck.c -lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c -lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c -lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c +lapacke_c_nancheck.c lapacke_d_nancheck.c lapacke_s_nancheck.c lapacke_z_nancheck.c +lapacke_cgb_nancheck.c lapacke_dgb_nancheck.c lapacke_sgb_nancheck.c lapacke_zgb_trans.c +lapacke_cgb_trans.c lapacke_dgb_trans.c lapacke_sgb_trans.c lapacke_zgb_nancheck.c +lapacke_cge_nancheck.c lapacke_dge_nancheck.c lapacke_sge_nancheck.c lapacke_zge_nancheck.c +lapacke_cge_trans.c lapacke_dge_trans.c lapacke_sge_trans.c lapacke_zge_trans.c +lapacke_cgg_nancheck.c lapacke_dgg_nancheck.c lapacke_sgg_nancheck.c lapacke_zgg_nancheck.c +lapacke_cgg_trans.c lapacke_dgg_trans.c lapacke_sgg_trans.c lapacke_zgg_trans.c +lapacke_cgt_nancheck.c lapacke_dgt_nancheck.c lapacke_sgt_nancheck.c lapacke_zgt_nancheck.c +lapacke_chb_nancheck.c lapacke_dsb_nancheck.c lapacke_ssb_nancheck.c lapacke_zhb_nancheck.c +lapacke_chb_trans.c lapacke_dsb_trans.c lapacke_ssb_trans.c lapacke_zhb_trans.c +lapacke_che_nancheck.c lapacke_zhe_nancheck.c +lapacke_che_trans.c lapacke_zhe_trans.c +lapacke_chp_nancheck.c lapacke_zhp_nancheck.c +lapacke_chp_trans.c lapacke_zhp_trans.c +lapacke_chs_nancheck.c lapacke_dhs_nancheck.c lapacke_shs_nancheck.c lapacke_zhs_nancheck.c +lapacke_chs_trans.c lapacke_dhs_trans.c lapacke_shs_trans.c lapacke_zhs_trans.c +lapacke_cpb_nancheck.c lapacke_dpb_nancheck.c lapacke_spb_nancheck.c lapacke_zpb_nancheck.c +lapacke_cpb_trans.c lapacke_dpb_trans.c lapacke_spb_trans.c lapacke_zpb_trans.c +lapacke_cpf_nancheck.c lapacke_dpf_nancheck.c lapacke_spf_nancheck.c lapacke_zpf_nancheck.c +lapacke_cpf_trans.c lapacke_dpf_trans.c lapacke_spf_trans.c lapacke_zpf_trans.c +lapacke_cpo_nancheck.c lapacke_dpo_nancheck.c lapacke_spo_nancheck.c lapacke_zpo_nancheck.c +lapacke_cpo_trans.c lapacke_dpo_trans.c lapacke_spo_trans.c lapacke_zpo_trans.c +lapacke_cpp_nancheck.c lapacke_dpp_nancheck.c lapacke_spp_nancheck.c lapacke_zpp_nancheck.c +lapacke_cpp_trans.c lapacke_dpp_trans.c lapacke_spp_trans.c lapacke_zpp_trans.c +lapacke_cpt_nancheck.c lapacke_dpt_nancheck.c lapacke_spt_nancheck.c lapacke_zpt_nancheck.c +lapacke_csp_nancheck.c lapacke_dsp_nancheck.c lapacke_ssp_nancheck.c lapacke_zsp_nancheck.c +lapacke_csp_trans.c lapacke_dsp_trans.c lapacke_ssp_trans.c lapacke_zsp_trans.c +lapacke_cst_nancheck.c lapacke_dst_nancheck.c lapacke_sst_nancheck.c lapacke_zst_nancheck.c +lapacke_csy_nancheck.c lapacke_dsy_nancheck.c lapacke_ssy_nancheck.c lapacke_zsy_nancheck.c +lapacke_csy_trans.c lapacke_dsy_trans.c lapacke_ssy_trans.c lapacke_zsy_trans.c +lapacke_ctb_nancheck.c lapacke_dtb_nancheck.c lapacke_stb_nancheck.c lapacke_ztb_nancheck.c +lapacke_ctb_trans.c lapacke_dtb_trans.c lapacke_stb_trans.c lapacke_ztb_trans.c +lapacke_ctf_nancheck.c lapacke_dtf_nancheck.c lapacke_stf_nancheck.c lapacke_ztf_nancheck.c +lapacke_ctf_trans.c lapacke_dtf_trans.c lapacke_stf_trans.c lapacke_ztf_trans.c +lapacke_ctp_nancheck.c lapacke_dtp_nancheck.c lapacke_stp_nancheck.c lapacke_ztp_nancheck.c +lapacke_ctp_trans.c lapacke_dtp_trans.c lapacke_stp_trans.c lapacke_ztp_trans.c +lapacke_ctr_nancheck.c lapacke_dtr_nancheck.c lapacke_str_nancheck.c lapacke_ztr_nancheck.c +lapacke_ctr_trans.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztr_trans.c +lapacke_ctz_nancheck.c lapacke_dtz_nancheck.c lapacke_stz_nancheck.c lapacke_ztz_nancheck.c + +lapacke_make_complex_float.c lapacke_make_complex_double.c +lapacke_lsame.c +lapacke_xerbla.c ) + + + + + + + + diff --git a/LAPACKE/utils/Makefile b/LAPACKE/utils/Makefile index adc5736507..3451906b12 100644 --- a/LAPACKE/utils/Makefile +++ b/LAPACKE/utils/Makefile @@ -76,6 +76,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_ctp_trans.o \ lapacke_ctr_nancheck.o \ lapacke_ctr_trans.o \ + lapacke_ctz_nancheck.o \ lapacke_dgb_nancheck.o \ lapacke_dgb_trans.o \ lapacke_dge_nancheck.o \ @@ -110,6 +111,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_dtp_trans.o \ lapacke_dtr_nancheck.o \ lapacke_dtr_trans.o \ + lapacke_dtz_nancheck.o \ lapacke_lsame.o \ lapacke_sgb_nancheck.o \ lapacke_sgb_trans.o \ @@ -145,6 +147,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_stp_trans.o \ lapacke_str_nancheck.o \ lapacke_str_trans.o \ + lapacke_stz_nancheck.o \ lapacke_xerbla.o \ lapacke_zgb_nancheck.o \ lapacke_zgb_trans.o \ @@ -184,6 +187,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_ztp_trans.o \ lapacke_ztr_nancheck.o \ lapacke_ztr_trans.o \ + lapacke_ztz_nancheck.o \ lapacke_make_complex_float.o \ lapacke_make_complex_double.o diff --git a/LAPACKE/utils/lapacke_ctz_nancheck.c b/LAPACKE/utils/lapacke_ctz_nancheck.c new file mode 100644 index 0000000000..baba1a83f1 --- /dev/null +++ b/LAPACKE/utils/lapacke_ctz_nancheck.c @@ -0,0 +1,142 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Check a trapezoidal matrix for NaN entries. The shape of the trapezoidal + matrix is determined by the arguments `direct` and `uplo`. `Direct` chooses + the diagonal which shall be considered and `uplo` tells us whether we use the + upper or lower part of the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float *a, + lapack_int lda ) +{ + lapack_logical colmaj, front, lower, unit; + + if( a == NULL ) return (lapack_logical) 0; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( uplo, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return (lapack_logical) 0; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_offset = -1; + lapack_int rect_m = (m > n) ? m - n : m; + lapack_int rect_n = (n > m) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n) { + rect_offset = tri_n * (!colmaj ? lda : 1); + } else if( !lower && n > m) { + rect_offset = tri_n * (colmaj ? lda : 1); + } + } else { + if( m > n) { + tri_offset = rect_m * (!colmaj ? lda : 1); + if( !lower ) { + rect_offset = 0; + } + } else if( n > m) { + tri_offset = rect_n * (colmaj ? lda : 1); + if( lower ) { + rect_offset = 0; + } + } + } + + /* Check rectangular part */ + if( rect_offset >= 0 ) { + if( LAPACKE_cge_nancheck(matrix_layout, rect_m, rect_n, &a[rect_offset], lda) ) { + return (lapack_logical) 1; + } + } + + /* Check triangular part */ + return LAPACKE_ctr_nancheck(matrix_layout, uplo, diag, tri_n, &a[tri_offset], lda); +} diff --git a/LAPACKE/utils/lapacke_dtz_nancheck.c b/LAPACKE/utils/lapacke_dtz_nancheck.c new file mode 100644 index 0000000000..53370eac35 --- /dev/null +++ b/LAPACKE/utils/lapacke_dtz_nancheck.c @@ -0,0 +1,141 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Check a trapezoidal matrix for NaN entries. The shape of the trapezoidal + matrix is determined by the arguments `direct` and `uplo`. `Direct` chooses + the diagonal which shall be considered and `uplo` tells us whether we use the + upper or lower part of the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const double *a, lapack_int lda ) +{ + lapack_logical colmaj, front, lower, unit; + + if( a == NULL ) return (lapack_logical) 0; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( uplo, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return (lapack_logical) 0; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_offset = -1; + lapack_int rect_m = (m > n) ? m - n : m; + lapack_int rect_n = (n > m) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n) { + rect_offset = tri_n * (!colmaj ? lda : 1); + } else if( !lower && n > m) { + rect_offset = tri_n * (colmaj ? lda : 1); + } + } else { + if( m > n) { + tri_offset = rect_m * (!colmaj ? lda : 1); + if( !lower ) { + rect_offset = 0; + } + } else if( n > m) { + tri_offset = rect_n * (colmaj ? lda : 1); + if( lower ) { + rect_offset = 0; + } + } + } + + /* Check rectangular part */ + if( rect_offset >= 0 ) { + if( LAPACKE_dge_nancheck(matrix_layout, rect_m, rect_n, &a[rect_offset], lda) ) { + return (lapack_logical) 1; + } + } + + /* Check triangular part */ + return LAPACKE_dtr_nancheck(matrix_layout, uplo, diag, tri_n, &a[tri_offset], lda); +} diff --git a/LAPACKE/utils/lapacke_stz_nancheck.c b/LAPACKE/utils/lapacke_stz_nancheck.c new file mode 100644 index 0000000000..ae46fb0a50 --- /dev/null +++ b/LAPACKE/utils/lapacke_stz_nancheck.c @@ -0,0 +1,141 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Check a trapezoidal matrix for NaN entries. The shape of the trapezoidal + matrix is determined by the arguments `direct` and `uplo`. `Direct` chooses + the diagonal which shall be considered and `uplo` tells us whether we use the + upper or lower part of the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const float *a, lapack_int lda ) +{ + lapack_logical colmaj, front, lower, unit; + + if( a == NULL ) return (lapack_logical) 0; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( uplo, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return (lapack_logical) 0; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_offset = -1; + lapack_int rect_m = (m > n) ? m - n : m; + lapack_int rect_n = (n > m) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n) { + rect_offset = tri_n * (!colmaj ? lda : 1); + } else if( !lower && n > m) { + rect_offset = tri_n * (colmaj ? lda : 1); + } + } else { + if( m > n) { + tri_offset = rect_m * (!colmaj ? lda : 1); + if( !lower ) { + rect_offset = 0; + } + } else if( n > m) { + tri_offset = rect_n * (colmaj ? lda : 1); + if( lower ) { + rect_offset = 0; + } + } + } + + /* Check rectangular part */ + if( rect_offset >= 0 ) { + if( LAPACKE_sge_nancheck(matrix_layout, rect_m, rect_n, &a[rect_offset], lda) ) { + return (lapack_logical) 1; + } + } + + /* Check triangular part */ + return LAPACKE_str_nancheck(matrix_layout, uplo, diag, tri_n, &a[tri_offset], lda); +} diff --git a/LAPACKE/utils/lapacke_ztz_nancheck.c b/LAPACKE/utils/lapacke_ztz_nancheck.c new file mode 100644 index 0000000000..84b8fb62da --- /dev/null +++ b/LAPACKE/utils/lapacke_ztz_nancheck.c @@ -0,0 +1,142 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Check a trapezoidal matrix for NaN entries. The shape of the trapezoidal + matrix is determined by the arguments `direct` and `uplo`. `Direct` chooses + the diagonal which shall be considered and `uplo` tells us whether we use the + upper or lower part of the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double *a, + lapack_int lda ) +{ + lapack_logical colmaj, front, lower, unit; + + if( a == NULL ) return (lapack_logical) 0; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( uplo, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return (lapack_logical) 0; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_offset = -1; + lapack_int rect_m = (m > n) ? m - n : m; + lapack_int rect_n = (n > m) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n) { + rect_offset = tri_n * (!colmaj ? lda : 1); + } else if( !lower && n > m) { + rect_offset = tri_n * (colmaj ? lda : 1); + } + } else { + if( m > n) { + tri_offset = rect_m * (!colmaj ? lda : 1); + if( !lower ) { + rect_offset = 0; + } + } else if( n > m) { + tri_offset = rect_n * (colmaj ? lda : 1); + if( lower ) { + rect_offset = 0; + } + } + } + + /* Check rectangular part */ + if( rect_offset >= 0 ) { + if( LAPACKE_zge_nancheck(matrix_layout, rect_m, rect_n, &a[rect_offset], lda) ) { + return (lapack_logical) 1; + } + } + + /* Check triangular part */ + return LAPACKE_ztr_nancheck(matrix_layout, uplo, diag, tri_n, &a[tri_offset], lda); +} From 0b36d0167902b070c215b660b995e2278dfe9179 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Thu, 27 Oct 2022 17:34:42 +0200 Subject: [PATCH 32/90] Formatting. --- LAPACKE/utils/lapacke_ctz_nancheck.c | 6 ++++-- LAPACKE/utils/lapacke_dtz_nancheck.c | 6 ++++-- LAPACKE/utils/lapacke_stz_nancheck.c | 6 ++++-- LAPACKE/utils/lapacke_ztz_nancheck.c | 6 ++++-- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/LAPACKE/utils/lapacke_ctz_nancheck.c b/LAPACKE/utils/lapacke_ctz_nancheck.c index baba1a83f1..7e912715c7 100644 --- a/LAPACKE/utils/lapacke_ctz_nancheck.c +++ b/LAPACKE/utils/lapacke_ctz_nancheck.c @@ -132,11 +132,13 @@ lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, /* Check rectangular part */ if( rect_offset >= 0 ) { - if( LAPACKE_cge_nancheck(matrix_layout, rect_m, rect_n, &a[rect_offset], lda) ) { + if( LAPACKE_cge_nancheck( matrix_layout, rect_m, rect_n, + &a[rect_offset], lda) ) { return (lapack_logical) 1; } } /* Check triangular part */ - return LAPACKE_ctr_nancheck(matrix_layout, uplo, diag, tri_n, &a[tri_offset], lda); + return LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, tri_n, + &a[tri_offset], lda ); } diff --git a/LAPACKE/utils/lapacke_dtz_nancheck.c b/LAPACKE/utils/lapacke_dtz_nancheck.c index 53370eac35..46e9099da0 100644 --- a/LAPACKE/utils/lapacke_dtz_nancheck.c +++ b/LAPACKE/utils/lapacke_dtz_nancheck.c @@ -131,11 +131,13 @@ lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, /* Check rectangular part */ if( rect_offset >= 0 ) { - if( LAPACKE_dge_nancheck(matrix_layout, rect_m, rect_n, &a[rect_offset], lda) ) { + if( LAPACKE_dge_nancheck( matrix_layout, rect_m, rect_n, + &a[rect_offset], lda) ) { return (lapack_logical) 1; } } /* Check triangular part */ - return LAPACKE_dtr_nancheck(matrix_layout, uplo, diag, tri_n, &a[tri_offset], lda); + return LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, tri_n, + &a[tri_offset], lda ); } diff --git a/LAPACKE/utils/lapacke_stz_nancheck.c b/LAPACKE/utils/lapacke_stz_nancheck.c index ae46fb0a50..df9768586e 100644 --- a/LAPACKE/utils/lapacke_stz_nancheck.c +++ b/LAPACKE/utils/lapacke_stz_nancheck.c @@ -131,11 +131,13 @@ lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, /* Check rectangular part */ if( rect_offset >= 0 ) { - if( LAPACKE_sge_nancheck(matrix_layout, rect_m, rect_n, &a[rect_offset], lda) ) { + if( LAPACKE_sge_nancheck( matrix_layout, rect_m, rect_n, + &a[rect_offset], lda) ) { return (lapack_logical) 1; } } /* Check triangular part */ - return LAPACKE_str_nancheck(matrix_layout, uplo, diag, tri_n, &a[tri_offset], lda); + return LAPACKE_str_nancheck( matrix_layout, uplo, diag, tri_n, + &a[tri_offset], lda ); } diff --git a/LAPACKE/utils/lapacke_ztz_nancheck.c b/LAPACKE/utils/lapacke_ztz_nancheck.c index 84b8fb62da..9869e7aa6f 100644 --- a/LAPACKE/utils/lapacke_ztz_nancheck.c +++ b/LAPACKE/utils/lapacke_ztz_nancheck.c @@ -132,11 +132,13 @@ lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, /* Check rectangular part */ if( rect_offset >= 0 ) { - if( LAPACKE_zge_nancheck(matrix_layout, rect_m, rect_n, &a[rect_offset], lda) ) { + if( LAPACKE_zge_nancheck( matrix_layout, rect_m, rect_n, + &a[rect_offset], lda) ) { return (lapack_logical) 1; } } /* Check triangular part */ - return LAPACKE_ztr_nancheck(matrix_layout, uplo, diag, tri_n, &a[tri_offset], lda); + return LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, tri_n, + &a[tri_offset], lda ); } From 14ff62a81e72832737914aa99df0eb7f27902500 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Thu, 27 Oct 2022 17:54:04 +0200 Subject: [PATCH 33/90] Removed empty lines. --- LAPACKE/utils/CMakeLists.txt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/LAPACKE/utils/CMakeLists.txt b/LAPACKE/utils/CMakeLists.txt index b137f68e40..97ddfe5656 100644 --- a/LAPACKE/utils/CMakeLists.txt +++ b/LAPACKE/utils/CMakeLists.txt @@ -43,11 +43,3 @@ lapacke_make_complex_float.c lapacke_lsame.c lapacke_xerbla.c ) - - - - - - - - From bcfdb93aed19c360c933292a32e9b10d00388803 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Thu, 27 Oct 2022 17:56:17 +0200 Subject: [PATCH 34/90] Removed tabs. --- LAPACKE/utils/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LAPACKE/utils/Makefile b/LAPACKE/utils/Makefile index 3451906b12..d8f4dac96e 100644 --- a/LAPACKE/utils/Makefile +++ b/LAPACKE/utils/Makefile @@ -76,7 +76,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_ctp_trans.o \ lapacke_ctr_nancheck.o \ lapacke_ctr_trans.o \ - lapacke_ctz_nancheck.o \ + lapacke_ctz_nancheck.o \ lapacke_dgb_nancheck.o \ lapacke_dgb_trans.o \ lapacke_dge_nancheck.o \ From b8f586bcc4143b7f995e590ba345b2148425b9c3 Mon Sep 17 00:00:00 2001 From: scr2016 Date: Sat, 29 Oct 2022 02:47:52 -0700 Subject: [PATCH 35/90] added a new routines [S,D,C,Z]GELST and test code for the least squares algorithm that solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q inside the routine. *GELST is similar to *GELS, but unlike *GELS does not compute triangular blocks twice, i.e. runs faster. modified: SRC/Makefile new file: SRC/cgelst.f new file: SRC/dgelst.f new file: SRC/sgelst.f new file: SRC/zgelst.f modified: TESTING/LIN/alahd.f modified: TESTING/LIN/cdrvls.f modified: TESTING/LIN/cerrls.f modified: TESTING/LIN/ddrvls.f modified: TESTING/LIN/derrls.f modified: TESTING/LIN/sdrvls.f modified: TESTING/LIN/serrls.f modified: TESTING/LIN/zdrvls.f --- SRC/CMakeLists.txt | 8 +- SRC/Makefile | 8 +- SRC/cgelst.f | 533 +++++++++++++++++++++++++++++++++++++++++++ SRC/dgelst.f | 531 ++++++++++++++++++++++++++++++++++++++++++ SRC/sgelst.f | 531 ++++++++++++++++++++++++++++++++++++++++++ SRC/zgelst.f | 533 +++++++++++++++++++++++++++++++++++++++++++ TESTING/LIN/alahd.f | 22 +- TESTING/LIN/cdrvls.f | 320 +++++++++++++++++++------- TESTING/LIN/cerrls.f | 61 ++++- TESTING/LIN/ddrvls.f | 339 ++++++++++++++++++++------- TESTING/LIN/derrls.f | 61 ++++- TESTING/LIN/sdrvls.f | 333 ++++++++++++++++++++------- TESTING/LIN/serrls.f | 61 ++++- TESTING/LIN/zdrvls.f | 333 ++++++++++++++++++++------- 14 files changed, 3311 insertions(+), 363 deletions(-) create mode 100644 SRC/cgelst.f create mode 100644 SRC/dgelst.f create mode 100644 SRC/sgelst.f create mode 100644 SRC/zgelst.f diff --git a/SRC/CMakeLists.txt b/SRC/CMakeLists.txt index d324d94116..5d2e072584 100644 --- a/SRC/CMakeLists.txt +++ b/SRC/CMakeLists.txt @@ -86,7 +86,7 @@ set(SLASRC sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f - sgels.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f + sgels.f sgelst.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f sgeqp3.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesv.f sgesvd.f sgesvdx.f sgesvx.f sgetc2.f sgetf2.f sgetri.f @@ -177,7 +177,7 @@ set(CLASRC cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f - cgels.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f + cgels.f cgelst.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesv.f cgesvd.f cgesvdx.f cgesvj.f cgejsv.f cgsvj0.f cgsvj1.f @@ -286,7 +286,7 @@ set(DLASRC dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f - dgels.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f + dgels.f dgelst.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f dgeqp3.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesv.f dgesvd.f dgesvdx.f dgesvx.f dgetc2.f dgetf2.f dgetrf.f dgetrf2.f dgetri.f @@ -375,7 +375,7 @@ set(ZLASRC zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f - zgels.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f + zgels.f zgelst.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f zgesc2.f zgesdd.f zgesv.f zgesvd.f zgesvdx.f zgesvx.f zgesvj.f zgejsv.f zgsvj0.f zgsvj1.f diff --git a/SRC/Makefile b/SRC/Makefile index 765abf42ac..35b8c64aea 100644 --- a/SRC/Makefile +++ b/SRC/Makefile @@ -118,7 +118,7 @@ SLASRC = \ sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \ sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \ sgehd2.o sgehrd.o sgelq2.o sgelqf.o \ - sgels.o sgelsd.o sgelss.o sgelsy.o sgeql2.o sgeqlf.o \ + sgels.o sgelst.o sgelsd.o sgelss.o sgelsy.o sgeql2.o sgeqlf.o \ sgeqp3.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \ sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvdx.o sgesvx.o \ sgetc2.o sgetf2.o sgetri.o \ @@ -211,7 +211,7 @@ CLASRC = \ cgbtf2.o cgbtrf.o cgbtrs.o cgebak.o cgebal.o cgebd2.o cgebrd.o \ cgecon.o cgeequ.o cgees.o cgeesx.o cgeev.o cgeevx.o \ cgehd2.o cgehrd.o cgelq2.o cgelqf.o \ - cgels.o cgelsd.o cgelss.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \ + cgels.o cgelst.o cgelsd.o cgelss.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \ cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \ cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o cgesvdx.o \ cgesvj.o cgejsv.o cgsvj0.o cgsvj1.o \ @@ -320,7 +320,7 @@ DLASRC = \ dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ dgehd2.o dgehrd.o dgelq2.o dgelqf.o \ - dgels.o dgelsd.o dgelss.o dgelsy.o dgeql2.o dgeqlf.o \ + dgels.o dgelst.o dgelsd.o dgelss.o dgelsy.o dgeql2.o dgeqlf.o \ dgeqp3.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \ dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvdx.o dgesvx.o \ dgetc2.o dgetf2.o dgetrf.o dgetri.o \ @@ -412,7 +412,7 @@ ZLASRC = \ zgbtf2.o zgbtrf.o zgbtrs.o zgebak.o zgebal.o zgebd2.o zgebrd.o \ zgecon.o zgeequ.o zgees.o zgeesx.o zgeev.o zgeevx.o \ zgehd2.o zgehrd.o zgelq2.o zgelqf.o \ - zgels.o zgelsd.o zgelss.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \ + zgels.o zgelst.o zgelsd.o zgelss.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \ zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \ zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvdx.o \ zgesvj.o zgejsv.o zgsvj0.o zgsvj1.o \ diff --git a/SRC/cgelst.f b/SRC/cgelst.f new file mode 100644 index 0000000000..26dda1c0ca --- /dev/null +++ b/SRC/cgelst.f @@ -0,0 +1,533 @@ +*> \brief CGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representaion of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its conjugate-transpose, using a QR +*> or LQ factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by CGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by CGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> modulus of elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of the modulus of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complexGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, CLANGE +* .. +* .. External Subroutines .. + EXTERNAL CGELQT, CGEQRT, CGEMLQT, CGEMQRT, SLABAD, + $ CLASCL, CLASET, CTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'CGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = REAL( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'CGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL CGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMQRT( 'Left', 'Conjugate transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL CGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMLQT( 'Left', 'Conjugate transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ M, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = REAL( LWOPT ) +* + RETURN +* +* End of CGELST +* + END diff --git a/SRC/dgelst.f b/SRC/dgelst.f new file mode 100644 index 0000000000..80957d5113 --- /dev/null +++ b/SRC/dgelst.f @@ -0,0 +1,531 @@ +*> \brief DGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representaion of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its transpose, using a QR or LQ +*> factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by DGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by DGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DGELQT, DGEQRT, DGEMLQT, DGEMQRT, DLABAD, + $ DLASCL, DLASET, DTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'DGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = DBLE( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'DGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = DLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL DGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMQRT( 'Left', 'Transpose', M, NRHS, N, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, WORK( MN*NB+1 ), + $ INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL DGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMLQT( 'Left', 'Transpose', N, NRHS, M, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = DBLE( LWOPT ) +* + RETURN +* +* End of DGELST +* + END diff --git a/SRC/sgelst.f b/SRC/sgelst.f new file mode 100644 index 0000000000..5e27e37e38 --- /dev/null +++ b/SRC/sgelst.f @@ -0,0 +1,531 @@ +*> \brief SGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representaion of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its transpose, using a QR or LQ +*> factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by SGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by SGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup realGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SGELQT, SGEQRT, SGEMLQT, SGEMQRT, SLABAD, + $ SLASCL, SLASET, STRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'SGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = REAL( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'SGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = SLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL SGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMQRT( 'Left', 'Transpose', M, NRHS, N, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, WORK( MN*NB+1 ), + $ INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL STRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL STRTRS( 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL SGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMLQT( 'Left', 'Transpose', N, NRHS, M, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = REAL( LWOPT ) +* + RETURN +* +* End of SGELST +* + END diff --git a/SRC/zgelst.f b/SRC/zgelst.f new file mode 100644 index 0000000000..35b73dc454 --- /dev/null +++ b/SRC/zgelst.f @@ -0,0 +1,533 @@ +*> \brief ZGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representaion of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its conjugate-transpose, using a QR +*> or LQ factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by ZGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by ZGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> modulus of elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of the modulus of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex16GEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL ZGELQT, ZGEQRT, ZGEMLQT, ZGEMQRT, DLABAD, + $ ZLASCL, ZLASET, ZTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'ZGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = DBLE( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'ZGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL ZGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMQRT( 'Left', 'Conjugate transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL ZGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMLQT( 'Left', 'Conjugate transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ M, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = DBLE( LWOPT ) +* + RETURN +* +* End of ZGELST +* + END diff --git a/TESTING/LIN/alahd.f b/TESTING/LIN/alahd.f index 2cc0fba063..f0423a23b9 100644 --- a/TESTING/LIN/alahd.f +++ b/TESTING/LIN/alahd.f @@ -608,17 +608,18 @@ SUBROUTINE ALAHD( IOUNIT, PATH ) ELSE IF( LSAMEN( 2, P2, 'LS' ) ) THEN * * LS: Least Squares driver routines for -* LS, LSD, LSS, LSX and LSY. +* LS, LST, TSLS, LSD, LSS, LSX and LSY. * WRITE( IOUNIT, FMT = 9984 )PATH WRITE( IOUNIT, FMT = 9967 ) - WRITE( IOUNIT, FMT = 9921 )C1, C1, C1, C1 + WRITE( IOUNIT, FMT = 9921 )C1, C1, C1, C1, C1, C1 WRITE( IOUNIT, FMT = 9935 )1 WRITE( IOUNIT, FMT = 9931 )2 - WRITE( IOUNIT, FMT = 9933 )3 - WRITE( IOUNIT, FMT = 9935 )4 - WRITE( IOUNIT, FMT = 9934 )5 - WRITE( IOUNIT, FMT = 9932 )6 + WRITE( IOUNIT, FMT = 9919 ) + WRITE( IOUNIT, FMT = 9933 )7 + WRITE( IOUNIT, FMT = 9935 )8 + WRITE( IOUNIT, FMT = 9934 )9 + WRITE( IOUNIT, FMT = 9932 )10 WRITE( IOUNIT, FMT = 9920 ) WRITE( IOUNIT, FMT = '( '' Messages:'' )' ) * @@ -1048,10 +1049,11 @@ SUBROUTINE ALAHD( IOUNIT, PATH ) $ 'check if X is in the row space of A or A'' ', $ '(overdetermined case)' ) 9929 FORMAT( ' Test ratios (1-3: ', A1, 'TZRZF):' ) - 9920 FORMAT( 3X, ' 7-10: same as 3-6', 3X, ' 11-14: same as 3-6' ) - 9921 FORMAT( ' Test ratios:', / ' (1-2: ', A1, 'GELS, 3-6: ', A1, - $ 'GELSY, 7-10: ', A1, 'GELSS, 11-14: ', A1, 'GELSD, 15-16: ', - $ A1, 'GETSLS)') + 9919 FORMAT( 3X, ' 3-4: same as 1-2', 3X, ' 5-6: same as 1-2' ) + 9920 FORMAT( 3X, ' 11-14: same as 7-10', 3X, ' 15-18: same as 7-10' ) + 9921 FORMAT( ' Test ratios:', / ' (1-2: ', A1, 'GELS, 3-4: ', A1, + $ 'GELST, 5-6: ', A1, 'GETSLS, 7-10: ', A1, 'GELSY, 11-14: ', + $ A1, 'GETSS, 15-18: ', A1, 'GELSD)' ) 9928 FORMAT( 7X, 'where ALPHA = ( 1 + SQRT( 17 ) ) / 8' ) 9927 FORMAT( 3X, I2, ': ABS( Largest element in L )', / 12X, $ ' - ( 1 / ( 1 - ALPHA ) ) + THRESH' ) diff --git a/TESTING/LIN/cdrvls.f b/TESTING/LIN/cdrvls.f index 7fe189e5fd..ecba705d5f 100644 --- a/TESTING/LIN/cdrvls.f +++ b/TESTING/LIN/cdrvls.f @@ -31,7 +31,8 @@ *> *> \verbatim *> -*> CDRVLS tests the least squares driver routines CGELS, CGETSLS, CGELSS, CGELSY +*> CDRVLS tests the least squares driver routines CGELS, CGELST, +*> CGETSLS, CGELSS, CGELSY *> and CGELSD. *> \endverbatim * @@ -211,7 +212,7 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) REAL ONE, ZERO @@ -228,8 +229,8 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, LRWORK, - $ LWORK_CGELS, LWORK_CGETSLS, LWORK_CGELSS, - $ LWORK_CGELSY, LWORK_CGELSD, + $ LWORK_CGELS, LWORK_CGELST, LWORK_CGETSLS, + $ LWORK_CGELSS, LWORK_CGELSY, LWORK_CGELSD, $ LRWORK_CGELSY, LRWORK_CGELSS, LRWORK_CGELSD REAL EPS, NORMA, NORMB, RCOND * .. @@ -249,7 +250,7 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, CERRLS, CGELS, CGELSD, - $ CGELSS, CGELSY, CGEMM, CGETSLS, CLACPY, + $ CGELSS, CGELST, CGELSY, CGEMM, CGETSLS, CLACPY, $ CLARNV, CQRT13, CQRT15, CQRT16, CSSCAL, $ SAXPY, XLAENV * .. @@ -334,7 +335,8 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -361,6 +363,10 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL CGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_CGELS = INT( WQ( 1 ) ) +* Compute workspace needed for CGELST + CALL CGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_CGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for CGETSLS CALL CGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -425,21 +431,26 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 100 -* +* ===================================================== +* Begin test CGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test CGELS -* * Generate a matrix of scaling type ISCALE * CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -484,15 +495,20 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for CGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL CQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, RWORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for CGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN @@ -515,7 +531,7 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -524,26 +540,34 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test CGETSLS + END DO + END DO + END IF +* ===================================================== +* End test CGELS +* ===================================================== +* ===================================================== +* Begin test CGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) + CALL XLAENV( 3, NXVAL( INB ) ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -560,9 +584,9 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, IF( NCOLS.GT.0 ) THEN CALL CLARNV( 2, ISEED, NCOLS*NRHS, $ WORK ) - CALL CSCAL( NCOLS*NRHS, - $ CONE / REAL( NCOLS ), WORK, - $ 1 ) + CALL CSSCAL( NCOLS*NRHS, + $ ONE / REAL( NCOLS ), WORK, + $ 1 ) END IF CALL CGEMM( TRANS, 'No transpose', NROWS, $ NRHS, NCOLS, CONE, COPYA, LDA, @@ -578,31 +602,37 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'CGETSLS ' - CALL CGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'CGELST' + CALL CGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) +* IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'CGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'CGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for CGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL CQRT16( TRANS, M, N, NRHS, COPYA, - $ LDA, B, LDB, C, LDB, WORK2, - $ RESULT( 15 ) ) + $ LDA, B, LDB, C, LDB, RWORK, + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for CGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * * Solving LS system * - RESULT( 16 ) = CQRT17( TRANS, 1, M, N, + RESULT( 4 ) = CQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -610,7 +640,7 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * Solving overdetermined system * - RESULT( 16 ) = CQRT14( TRANS, M, N, + RESULT( 4 ) = CQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -618,21 +648,151 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 )TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO + END IF +* ===================================================== +* End test CGELST +* ===================================================== +* ===================================================== +* Begin test CGELSTSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO INB = 1, NNB + MB = NBVAL( INB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO IMB = 1, NNB + NB = NBVAL( IMB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'C' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL CLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL CSCAL( NCOLS*NRHS, + $ CONE / REAL( NCOLS ), + $ WORK, 1 ) + END IF + CALL CGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, CONE, + $ COPYA, LDA, WORK, LDWORK, + $ CZERO, B, LDB ) + CALL CLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL CLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL CLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'CGETSLS ' + CALL CGETSLS( TRANS, M, N, NRHS, A, + $ LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'CGETSLS ', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for CGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL CLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL CQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK2, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for CGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = CQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = CQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, B, + $ LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 )TRANS, + $ M, N, NRHS, MB, NB, ITYPE, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO END IF +* ===================================================== +* End test CGELSTSLS +* ==================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -680,37 +840,37 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * workspace used: 2*MNMIN+NB*NB+NB*MAX(N,NRHS) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = CQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = CQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK, RWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 4 ) ) + $ RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = CQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = CQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = CQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = CQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -736,38 +896,38 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 8 ) ) + $ RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = CQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = CQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = CQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = CQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -792,45 +952,45 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 12 ) ) + $ RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = CQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = CQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = CQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = CQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 80 K = 3, 14 + DO 80 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) diff --git a/TESTING/LIN/cerrls.f b/TESTING/LIN/cerrls.f index 48e44ad863..fca9439181 100644 --- a/TESTING/LIN/cerrls.f +++ b/TESTING/LIN/cerrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> CERRLS tests the error exits for the COMPLEX least squares -*> driver routines (CGELS, CGELSS, CGELSY, CGELSD). +*> driver routines (CGELS, CGELST, CGETSLS, CGELSS, CGELSY, CGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ SUBROUTINE CERRLS( PATH, NUNIT ) EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CGELS, CGELSD, CGELSS, CGELSY, CHKXER + EXTERNAL ALAESM, CHKXER, CGELS, CGELSD, CGELSS, CGELST, + $ CGELSY, CGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ SUBROUTINE CERRLS( PATH, NUNIT ) INFOT = 8 CALL CGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'CGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'CGELS ', INFOT, NOUT, LERR, OK ) * +* CGELST +* + SRNAMT = 'CGELST' + INFOT = 1 + CALL CGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) +* +* CGETSLS +* + SRNAMT = 'CGETSLS' + INFOT = 1 + CALL CGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) +* * CGELSS * SRNAMT = 'CGELSS' diff --git a/TESTING/LIN/ddrvls.f b/TESTING/LIN/ddrvls.f index b64930c10c..b3d07d67f2 100644 --- a/TESTING/LIN/ddrvls.f +++ b/TESTING/LIN/ddrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> DDRVLS tests the least squares driver routines DGELS, DGETSLS, DGELSS, DGELSY, -*> and DGELSD. +*> DDRVLS tests the least squares driver routines DGELS, DGELST, +*> DGETSLS, DGELSS, DGELSY, and DGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) DOUBLE PRECISION ONE, TWO, ZERO @@ -225,8 +225,8 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, - $ LWORK_DGELS, LWORK_DGETSLS, LWORK_DGELSS, - $ LWORK_DGELSY, LWORK_DGELSD + $ LWORK_DGELS, LWORK_DGELST, LWORK_DGETSLS, + $ LWORK_DGELSS, LWORK_DGELSY, LWORK_DGELSD DOUBLE PRECISION EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. @@ -243,12 +243,12 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, DERRLS, DGELS, - $ DGELSD, DGELSS, DGELSY, DGEMM, DLACPY, - $ DLARNV, DLASRT, DQRT13, DQRT15, DQRT16, DSCAL, - $ XLAENV + $ DGELSD, DGELSS, DGELST, DGELSY, DGEMM, + $ DGETSLS, DLACPY, DLARNV, DQRT13, DQRT15, + $ DQRT16, DSCAL, XLAENV * .. * .. Intrinsic Functions .. - INTRINSIC DBLE, INT, LOG, MAX, MIN, SQRT + INTRINSIC DBLE, INT, MAX, MIN, SQRT * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -330,7 +330,8 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -357,6 +358,10 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL DGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_DGELS = INT ( WQ ( 1 ) ) +* Compute workspace needed for DGELST + CALL DGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_DGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for DGETSLS CALL DGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -378,9 +383,9 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Compute LIWORK workspace needed for DGELSY and DGELSD LIWORK = MAX( LIWORK, N, IWQ( 1 ) ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_DGELS, LWORK_DGETSLS, - $ LWORK_DGELSY, LWORK_DGELSS, - $ LWORK_DGELSD ) + LWORK = MAX( LWORK, LWORK_DGELS, LWORK_DGELST, + $ LWORK_DGETSLS, LWORK_DGELSY, + $ LWORK_DGELSS, LWORK_DGELSD ) END IF ENDDO ENDDO @@ -411,21 +416,26 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 110 -* +* ===================================================== +* Begin test DGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test DGELS -* * Generate a matrix of scaling type ISCALE * CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -469,20 +479,27 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for DGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL DQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for DGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * RESULT( 2 ) = DQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, @@ -500,35 +517,42 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9999 )TRANS, M, + WRITE( NOUT, FMT = 9999 ) TRANS, M, $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test DGETSLS + END DO + END DO + END IF +* ===================================================== +* End test DGELS +* ===================================================== +* ===================================================== +* Begin test DGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -563,31 +587,38 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'DGETSLS ' - CALL DGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'DGELST' + CALL DGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'DGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'DGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for DGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL DQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, - $ RESULT( 15 ) ) + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for DGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * - RESULT( 16 ) = DQRT17( TRANS, 1, M, N, + RESULT( 4 ) = DQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -595,7 +626,7 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * Solving overdetermined system * - RESULT( 16 ) = DQRT14( TRANS, M, N, + RESULT( 4 ) = DQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -603,21 +634,151 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 ) TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO + END IF +* ===================================================== +* End test DGELST +* ===================================================== +* ===================================================== +* Begin test DGETSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO IMB = 1, NNB + MB = NBVAL( IMB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'T' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL DLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL DSCAL( NCOLS*NRHS, + $ ONE / DBLE( NCOLS ), + $ WORK, 1 ) + END IF + CALL DGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, ONE, + $ COPYA, LDA, WORK, LDWORK, + $ ZERO, B, LDB ) + CALL DLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL DLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL DLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'DGETSLS' + CALL DGETSLS( TRANS, M, N, NRHS, + $ A, LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'DGETSLS', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for DGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL DLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL DQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for DGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = DQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = DQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, + $ B, LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 ) TRANS, + $ M, N, NRHS, MB, NB, ITYPE, + $ K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO END IF +* ===================================================== +* End test DGETSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -662,37 +823,37 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = DQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = DQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 4 ) ) + $ WORK( M*NRHS+1 ), RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -716,38 +877,38 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 8 ) ) + $ WORK( M*NRHS+1 ), RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -776,45 +937,45 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 12 ) ) + $ WORK( M*NRHS+1 ), RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 90 K = 3, 14 + DO 90 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -826,6 +987,12 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, NRUN = NRUN + 12 * 100 CONTINUE + + + + + + 110 CONTINUE 120 CONTINUE 130 CONTINUE diff --git a/TESTING/LIN/derrls.f b/TESTING/LIN/derrls.f index a1f74dec23..09d745238e 100644 --- a/TESTING/LIN/derrls.f +++ b/TESTING/LIN/derrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> DERRLS tests the error exits for the DOUBLE PRECISION least squares -*> driver routines (DGELS, SGELSS, SGELSY, SGELSD). +*> driver routines (DGELS, DGELST, DGETSLS, SGELSS, SGELSY, SGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ SUBROUTINE DERRLS( PATH, NUNIT ) EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, DGELS, DGELSD, DGELSS, DGELSY + EXTERNAL ALAESM, CHKXER, DGELS, DGELSD, DGELSS, DGELST, + $ DGELSY, DGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ SUBROUTINE DERRLS( PATH, NUNIT ) INFOT = 8 CALL DGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'DGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'DGELS ', INFOT, NOUT, LERR, OK ) * +* DGELST +* + SRNAMT = 'DGELST' + INFOT = 1 + CALL DGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) +* +* DGETSLS +* + SRNAMT = 'DGETSLS' + INFOT = 1 + CALL DGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) +* * DGELSS * SRNAMT = 'DGELSS' diff --git a/TESTING/LIN/sdrvls.f b/TESTING/LIN/sdrvls.f index b964515037..2baf9a3fb1 100644 --- a/TESTING/LIN/sdrvls.f +++ b/TESTING/LIN/sdrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> SDRVLS tests the least squares driver routines SGELS, SGETSLS, SGELSS, SGELSY, -*> and SGELSD. +*> SDRVLS tests the least squares driver routines SGELS, SGELST, +*> SGETSLS, SGELSS, SGELSY and SGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) REAL ONE, TWO, ZERO @@ -225,8 +225,8 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, - $ LWORK_SGELS, LWORK_SGETSLS, LWORK_SGELSS, - $ LWORK_SGELSY, LWORK_SGELSD + $ LWORK_SGELS, LWORK_SGELST, LWORK_SGETSLS, + $ LWORK_SGELSS, LWORK_SGELSY, LWORK_SGELSD REAL EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. @@ -243,12 +243,12 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, SAXPY, SERRLS, SGELS, - $ SGELSD, SGELSS, SGELSY, SGEMM, SLACPY, - $ SLARNV, SQRT13, SQRT15, SQRT16, SSCAL, - $ XLAENV, SGETSLS + $ SGELSD, SGELSS, SGELST, SGELSY, SGEMM, + $ SGETSLS, SLACPY, SLARNV, SQRT13, SQRT15, + $ SQRT16, SSCAL, XLAENV * .. * .. Intrinsic Functions .. - INTRINSIC INT, LOG, MAX, MIN, REAL, SQRT + INTRINSIC INT, MAX, MIN, REAL, SQRT * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -330,7 +330,8 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -357,6 +358,10 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL SGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ( 1 ), -1, INFO ) LWORK_SGELS = INT ( WQ( 1 ) ) +* Compute workspace needed for SGELST + CALL SGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_SGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for SGETSLS CALL SGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ( 1 ), -1, INFO ) @@ -378,9 +383,9 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Compute LIWORK workspace needed for SGELSY and SGELSD LIWORK = MAX( LIWORK, N, IWQ( 1 ) ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_SGELS, LWORK_SGETSLS, - $ LWORK_SGELSY, LWORK_SGELSS, - $ LWORK_SGELSD ) + LWORK = MAX( LWORK, LWORK_SGELS, LWORK_SGELST, + $ LWORK_SGETSLS, LWORK_SGELSY, + $ LWORK_SGELSS, LWORK_SGELSD ) END IF ENDDO ENDDO @@ -411,21 +416,26 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 110 -* +* ===================================================== +* Begin test SGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test SGELS -* * Generate a matrix of scaling type ISCALE * CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -469,20 +479,27 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for SGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL SQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for SGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * RESULT( 2 ) = SQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, @@ -500,7 +517,7 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -509,26 +526,33 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test SGETSLS + END DO + END DO + END IF +* ===================================================== +* End test SGELS +* ===================================================== +* ===================================================== +* Begin test SGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) -* - DO 60 ITRAN = 1, 2 +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -563,31 +587,38 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'SGETSLS ' - CALL SGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'SGELST' + CALL SGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'SGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'SGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for SGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL SQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, - $ RESULT( 15 ) ) + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for SGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * - RESULT( 16 ) = SQRT17( TRANS, 1, M, N, + RESULT( 4 ) = SQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -595,7 +626,7 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * Solving overdetermined system * - RESULT( 16 ) = SQRT14( TRANS, M, N, + RESULT( 4 ) = SQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -603,21 +634,151 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 ) TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO END IF +* ===================================================== +* End test SGELST +* ===================================================== +* ===================================================== +* Begin test SGETSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO IMB = 1, NNB + MB = NBVAL( IMB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'T' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL SLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL SSCAL( NCOLS*NRHS, + $ ONE / REAL( NCOLS ), + $ WORK, 1 ) + END IF + CALL SGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, ONE, + $ COPYA, LDA, WORK, LDWORK, + $ ZERO, B, LDB ) + CALL SLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL SLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL SLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'SGETSLS' + CALL SGETSLS( TRANS, M, N, NRHS, + $ A, LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'SGETSLS', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for SGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL SLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL SQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for SGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = SQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = SQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, + $ B, LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 ) TRANS, + $ M, N, NRHS, MB, NB, ITYPE, + $ K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO + END IF +* ===================================================== +* End test SGETSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -662,37 +823,37 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = SQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = SQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 4 ) ) + $ WORK( M*NRHS+1 ), RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -716,38 +877,38 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 8 ) ) + $ WORK( M*NRHS+1 ), RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -776,45 +937,45 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 12 ) ) + $ WORK( M*NRHS+1 ), RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 90 K = 3, 14 + DO 90 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) diff --git a/TESTING/LIN/serrls.f b/TESTING/LIN/serrls.f index e6ee4360f9..6c4820066a 100644 --- a/TESTING/LIN/serrls.f +++ b/TESTING/LIN/serrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> SERRLS tests the error exits for the REAL least squares -*> driver routines (SGELS, SGELSS, SGELSY, SGELSD). +*> driver routines (SGELS, SGELST, SGETSLS, SGELSS, SGELSY, SGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ SUBROUTINE SERRLS( PATH, NUNIT ) EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, SGELS, SGELSD, SGELSS, SGELSY + EXTERNAL ALAESM, CHKXER, SGELS, SGELSD, SGELSS, SGELST, + $ SGELSY, SGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ SUBROUTINE SERRLS( PATH, NUNIT ) INFOT = 8 CALL SGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'SGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'SGELS ', INFOT, NOUT, LERR, OK ) * +* SGELST +* + SRNAMT = 'SGELST' + INFOT = 1 + CALL SGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) +* +* SGETSLS +* + SRNAMT = 'SGETSLS' + INFOT = 1 + CALL SGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) +* * SGELSS * SRNAMT = 'SGELSS' diff --git a/TESTING/LIN/zdrvls.f b/TESTING/LIN/zdrvls.f index 2eab979054..b21345d302 100644 --- a/TESTING/LIN/zdrvls.f +++ b/TESTING/LIN/zdrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> ZDRVLS tests the least squares driver routines ZGELS, ZGETSLS, ZGELSS, ZGELSY -*> and ZGELSD. +*> ZDRVLS tests the least squares driver routines ZGELS, ZGELST, +*> ZGETSLS, ZGELSS, ZGELSY and ZGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) DOUBLE PRECISION ONE, ZERO @@ -228,8 +228,8 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, LRWORK, - $ LWORK_ZGELS, LWORK_ZGETSLS, LWORK_ZGELSS, - $ LWORK_ZGELSY, LWORK_ZGELSD, + $ LWORK_ZGELS, LWORK_ZGELST, LWORK_ZGETSLS, + $ LWORK_ZGELSS, LWORK_ZGELSY, LWORK_ZGELSD, $ LRWORK_ZGELSY, LRWORK_ZGELSS, LRWORK_ZGELSD DOUBLE PRECISION EPS, NORMA, NORMB, RCOND * .. @@ -248,10 +248,10 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, EXTERNAL DASUM, DLAMCH, ZQRT12, ZQRT14, ZQRT17 * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, DLASRT, XLAENV, - $ ZDSCAL, ZERRLS, ZGELS, ZGELSD, ZGELSS, - $ ZGELSY, ZGEMM, ZLACPY, ZLARNV, ZQRT13, ZQRT15, - $ ZQRT16, ZGETSLS + EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, ZERRLS, ZGELS, + $ ZGELSD, ZGELSS, ZGELST, ZGELSY, ZGEMM, + $ ZGETSLS, ZLACPY, ZLARNV, ZQRT13, ZQRT15, + $ ZQRT16, ZDSCAL, XLAENV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, INT, SQRT @@ -334,7 +334,8 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -361,6 +362,10 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL ZGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_ZGELS = INT ( WQ( 1 ) ) +* Compute workspace needed for ZGELST + CALL ZGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_ZGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for ZGETSLS CALL ZGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -390,9 +395,9 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LRWORK = MAX( LRWORK, LRWORK_ZGELSY, $ LRWORK_ZGELSS, LRWORK_ZGELSD ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_ZGELS, LWORK_ZGETSLS, - $ LWORK_ZGELSY, LWORK_ZGELSS, - $ LWORK_ZGELSD ) + LWORK = MAX( LWORK, LWORK_ZGELS, LWORK_ZGELST, + $ LWORK_ZGETSLS, LWORK_ZGELSY, + $ LWORK_ZGELSS, LWORK_ZGELSD ) END IF ENDDO ENDDO @@ -425,21 +430,26 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 100 -* +* ===================================================== +* Begin test ZGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test ZGELS -* * Generate a matrix of scaling type ISCALE * CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -484,15 +494,20 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for ZGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL ZQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, RWORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for ZGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN @@ -515,7 +530,7 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -524,26 +539,34 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test ZGETSLS + END DO + END DO + END IF +* ===================================================== +* End test ZGELS +* ===================================================== +* ===================================================== +* Begin test ZGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) + CALL XLAENV( 3, NXVAL( INB ) ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -560,9 +583,9 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, IF( NCOLS.GT.0 ) THEN CALL ZLARNV( 2, ISEED, NCOLS*NRHS, $ WORK ) - CALL ZSCAL( NCOLS*NRHS, - $ CONE / DBLE( NCOLS ), WORK, - $ 1 ) + CALL ZDSCAL( NCOLS*NRHS, + $ ONE / DBLE( NCOLS ), WORK, + $ 1 ) END IF CALL ZGEMM( TRANS, 'No transpose', NROWS, $ NRHS, NCOLS, CONE, COPYA, LDA, @@ -578,31 +601,37 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'ZGETSLS ' - CALL ZGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'ZGELST' + CALL ZGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) +* IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'ZGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'ZGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for ZGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL ZQRT16( TRANS, M, N, NRHS, COPYA, - $ LDA, B, LDB, C, LDB, WORK2, - $ RESULT( 15 ) ) + $ LDA, B, LDB, C, LDB, RWORK, + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for ZGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * * Solving LS system * - RESULT( 16 ) = ZQRT17( TRANS, 1, M, N, + RESULT( 4 ) = ZQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -610,7 +639,7 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * Solving overdetermined system * - RESULT( 16 ) = ZQRT14( TRANS, M, N, + RESULT( 4 ) = ZQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -618,21 +647,151 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 )TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO + END IF +* ===================================================== +* End test ZGELST +* ===================================================== +* ===================================================== +* Begin test ZGELSTSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO INB = 1, NNB + MB = NBVAL( INB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO IMB = 1, NNB + NB = NBVAL( IMB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'C' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL ZLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL ZSCAL( NCOLS*NRHS, + $ CONE / DBLE( NCOLS ), + $ WORK, 1 ) + END IF + CALL ZGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, CONE, + $ COPYA, LDA, WORK, LDWORK, + $ CZERO, B, LDB ) + CALL ZLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL ZLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL ZLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'ZGETSLS ' + CALL ZGETSLS( TRANS, M, N, NRHS, A, + $ LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'ZGETSLS ', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for ZGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL ZLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL ZQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK2, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for ZGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = ZQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = ZQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, B, + $ LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 )TRANS, + $ M, N, NRHS, MB, NB, ITYPE, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO END IF +* ===================================================== +* End test ZGELSTSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -680,37 +839,37 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * workspace used: 2*MNMIN+NB*NB+NB*MAX(N,NRHS) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = ZQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = ZQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK, RWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 4 ) ) + $ RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -736,38 +895,38 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 8 ) ) + $ RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -792,45 +951,45 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 12 ) ) + $ RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 80 K = 3, 14 + DO 80 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) From a7e07245306098c1d0fd70f49ae7ef78bd6a5538 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 17:30:26 +0100 Subject: [PATCH 36/90] Used trapezoidal NaN check in LAPACKE_?larfb --- LAPACKE/src/lapacke_clarfb.c | 66 ++++++++++-------------------------- LAPACKE/src/lapacke_dlarfb.c | 66 ++++++++++-------------------------- LAPACKE/src/lapacke_slarfb.c | 66 ++++++++++-------------------------- LAPACKE/src/lapacke_zlarfb.c | 66 ++++++++++-------------------------- 4 files changed, 72 insertions(+), 192 deletions(-) diff --git a/LAPACKE/src/lapacke_clarfb.c b/LAPACKE/src/lapacke_clarfb.c index ccd34cecdf..8acbd9beb3 100644 --- a/LAPACKE/src/lapacke_clarfb.c +++ b/LAPACKE/src/lapacke_clarfb.c @@ -43,6 +43,8 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct lapack_int ldwork; lapack_complex_float* work = NULL; lapack_int ncols_v, nrows_v; + lapack_logical left, col, forward; + char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_clarfb", -1 ); return -1; @@ -50,59 +52,27 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - lapack_int lrv, lcv; /* row, column stride */ - if( matrix_layout == LAPACK_COL_MAJOR ) { - lrv = 1; - lcv = ldv; - } else { - lrv = ldv; - lcv = 1; - } - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); + return -8; + } + if( LAPACKE_ctz_nancheck( matrix_layout, direct, uplo, 'u', + ncols_v, nrows_v, v, ldv ) ) { + return -9; } if( LAPACKE_cge_nancheck( matrix_layout, k, k, t, ldt ) ) { return -11; } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ctr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v-k, ncols_v, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); - return -8; - } - if( LAPACKE_ctr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*lrv], ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ctr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v-k, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); - return -8; - } - if( LAPACKE_ctr_nancheck( matrix_layout, 'l', 'u', k, - &v[(ncols_v-k)*lcv], ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } } #endif diff --git a/LAPACKE/src/lapacke_dlarfb.c b/LAPACKE/src/lapacke_dlarfb.c index 3c3c24c54b..1fbb8639c6 100644 --- a/LAPACKE/src/lapacke_dlarfb.c +++ b/LAPACKE/src/lapacke_dlarfb.c @@ -42,6 +42,8 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct lapack_int ldwork; double* work = NULL; lapack_int ncols_v, nrows_v; + lapack_logical left, col, forward; + char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dlarfb", -1 ); return -1; @@ -49,59 +51,27 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - lapack_int lrv, lcv; /* row, column stride */ - if( matrix_layout == LAPACK_COL_MAJOR ) { - lrv = 1; - lcv = ldv; - } else { - lrv = ldv; - lcv = 1; - } - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); + return -8; + } + if( LAPACKE_dtz_nancheck( matrix_layout, direct, uplo, 'u', + ncols_v, nrows_v, v, ldv ) ) { + return -9; } if( LAPACKE_dge_nancheck( matrix_layout, k, k, t, ldt ) ) { return -11; } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_dtr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v-k, ncols_v, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); - return -8; - } - if( LAPACKE_dtr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*lrv], ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_dtr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v-k, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); - return -8; - } - if( LAPACKE_dtr_nancheck( matrix_layout, 'l', 'u', k, - &v[(ncols_v-k)*lcv], ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } } #endif diff --git a/LAPACKE/src/lapacke_slarfb.c b/LAPACKE/src/lapacke_slarfb.c index 37d51dee58..2aa95e044e 100644 --- a/LAPACKE/src/lapacke_slarfb.c +++ b/LAPACKE/src/lapacke_slarfb.c @@ -42,6 +42,8 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct lapack_int ldwork; float* work = NULL; lapack_int ncols_v, nrows_v; + lapack_logical left, col, forward; + char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_slarfb", -1 ); return -1; @@ -49,59 +51,27 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - lapack_int lrv, lcv; /* row, column stride */ - if( matrix_layout == LAPACK_COL_MAJOR ) { - lrv = 1; - lcv = ldv; - } else { - lrv = ldv; - lcv = 1; - } - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); + return -8; + } + if( LAPACKE_stz_nancheck( matrix_layout, direct, uplo, 'u', + ncols_v, nrows_v, v, ldv ) ) { + return -9; } if( LAPACKE_sge_nancheck( matrix_layout, k, k, t, ldt ) ) { return -11; } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_str_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v-k, ncols_v, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); - return -8; - } - if( LAPACKE_str_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*lrv], ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_str_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v-k, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); - return -8; - } - if( LAPACKE_str_nancheck( matrix_layout, 'l', 'u', k, - &v[(ncols_v-k)*lcv], ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } } #endif diff --git a/LAPACKE/src/lapacke_zlarfb.c b/LAPACKE/src/lapacke_zlarfb.c index 7cd23dde8f..7d5c8354a8 100644 --- a/LAPACKE/src/lapacke_zlarfb.c +++ b/LAPACKE/src/lapacke_zlarfb.c @@ -43,6 +43,8 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct lapack_int ldwork; lapack_complex_double* work = NULL; lapack_int ncols_v, nrows_v; + lapack_logical left, col, forward; + char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zlarfb", -1 ); return -1; @@ -50,59 +52,27 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - lapack_int lrv, lcv; /* row, column stride */ - if( matrix_layout == LAPACK_COL_MAJOR ) { - lrv = 1; - lcv = ldv; - } else { - lrv = ldv; - lcv = 1; - } - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); + return -8; + } + if( LAPACKE_ztz_nancheck( matrix_layout, direct, uplo, 'u', + ncols_v, nrows_v, v, ldv ) ) { + return -9; } if( LAPACKE_zge_nancheck( matrix_layout, k, k, t, ldt ) ) { return -11; } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ztr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v-k, ncols_v, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); - return -8; - } - if( LAPACKE_ztr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*lrv], ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ztr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v-k, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); - return -8; - } - if( LAPACKE_ztr_nancheck( matrix_layout, 'l', 'u', k, - &v[(ncols_v-k)*lcv], ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } } #endif From 2da7000a7f370beef0c02a18a5af43fb0fdede16 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 20:12:09 +0100 Subject: [PATCH 37/90] Fixed bug in trapezoidal NaN check. --- LAPACKE/utils/lapacke_ctz_nancheck.c | 2 +- LAPACKE/utils/lapacke_dtz_nancheck.c | 4 ++-- LAPACKE/utils/lapacke_stz_nancheck.c | 2 +- LAPACKE/utils/lapacke_ztz_nancheck.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/LAPACKE/utils/lapacke_ctz_nancheck.c b/LAPACKE/utils/lapacke_ctz_nancheck.c index 7e912715c7..5e0677191e 100644 --- a/LAPACKE/utils/lapacke_ctz_nancheck.c +++ b/LAPACKE/utils/lapacke_ctz_nancheck.c @@ -95,7 +95,7 @@ lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, unit = LAPACKE_lsame( diag, 'u' ); if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || - ( !front && !LAPACKE_lsame( uplo, 'b' ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { /* Just exit if any of input parameters are wrong */ diff --git a/LAPACKE/utils/lapacke_dtz_nancheck.c b/LAPACKE/utils/lapacke_dtz_nancheck.c index 46e9099da0..e1bc158067 100644 --- a/LAPACKE/utils/lapacke_dtz_nancheck.c +++ b/LAPACKE/utils/lapacke_dtz_nancheck.c @@ -94,7 +94,7 @@ lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, unit = LAPACKE_lsame( diag, 'u' ); if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || - ( !front && !LAPACKE_lsame( uplo, 'b' ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { /* Just exit if any of input parameters are wrong */ @@ -132,7 +132,7 @@ lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, /* Check rectangular part */ if( rect_offset >= 0 ) { if( LAPACKE_dge_nancheck( matrix_layout, rect_m, rect_n, - &a[rect_offset], lda) ) { + &a[rect_offset], lda ) ) { return (lapack_logical) 1; } } diff --git a/LAPACKE/utils/lapacke_stz_nancheck.c b/LAPACKE/utils/lapacke_stz_nancheck.c index df9768586e..38cca2893d 100644 --- a/LAPACKE/utils/lapacke_stz_nancheck.c +++ b/LAPACKE/utils/lapacke_stz_nancheck.c @@ -94,7 +94,7 @@ lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, unit = LAPACKE_lsame( diag, 'u' ); if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || - ( !front && !LAPACKE_lsame( uplo, 'b' ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { /* Just exit if any of input parameters are wrong */ diff --git a/LAPACKE/utils/lapacke_ztz_nancheck.c b/LAPACKE/utils/lapacke_ztz_nancheck.c index 9869e7aa6f..e4d24fadb2 100644 --- a/LAPACKE/utils/lapacke_ztz_nancheck.c +++ b/LAPACKE/utils/lapacke_ztz_nancheck.c @@ -95,7 +95,7 @@ lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, unit = LAPACKE_lsame( diag, 'u' ); if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || - ( !front && !LAPACKE_lsame( uplo, 'b' ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { /* Just exit if any of input parameters are wrong */ From c43d27a8891ae96afbae5f2b59560a78f7f26cda Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 20:14:32 +0100 Subject: [PATCH 38/90] Paranthesis formatting. --- LAPACKE/utils/lapacke_ctz_nancheck.c | 8 ++++---- LAPACKE/utils/lapacke_dtz_nancheck.c | 8 ++++---- LAPACKE/utils/lapacke_stz_nancheck.c | 8 ++++---- LAPACKE/utils/lapacke_ztz_nancheck.c | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/LAPACKE/utils/lapacke_ctz_nancheck.c b/LAPACKE/utils/lapacke_ctz_nancheck.c index 5e0677191e..564857f4e6 100644 --- a/LAPACKE/utils/lapacke_ctz_nancheck.c +++ b/LAPACKE/utils/lapacke_ctz_nancheck.c @@ -112,18 +112,18 @@ lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, /* Fix offsets depending on the shape of the matrix */ if( front ) { if( lower && m > n) { - rect_offset = tri_n * (!colmaj ? lda : 1); + rect_offset = tri_n * ( !colmaj ? lda : 1 ); } else if( !lower && n > m) { - rect_offset = tri_n * (colmaj ? lda : 1); + rect_offset = tri_n * ( colmaj ? lda : 1 ); } } else { if( m > n) { - tri_offset = rect_m * (!colmaj ? lda : 1); + tri_offset = rect_m * ( !colmaj ? lda : 1 ); if( !lower ) { rect_offset = 0; } } else if( n > m) { - tri_offset = rect_n * (colmaj ? lda : 1); + tri_offset = rect_n * ( colmaj ? lda : 1 ); if( lower ) { rect_offset = 0; } diff --git a/LAPACKE/utils/lapacke_dtz_nancheck.c b/LAPACKE/utils/lapacke_dtz_nancheck.c index e1bc158067..189a5778dd 100644 --- a/LAPACKE/utils/lapacke_dtz_nancheck.c +++ b/LAPACKE/utils/lapacke_dtz_nancheck.c @@ -111,18 +111,18 @@ lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, /* Fix offsets depending on the shape of the matrix */ if( front ) { if( lower && m > n) { - rect_offset = tri_n * (!colmaj ? lda : 1); + rect_offset = tri_n * ( !colmaj ? lda : 1 ); } else if( !lower && n > m) { - rect_offset = tri_n * (colmaj ? lda : 1); + rect_offset = tri_n * ( colmaj ? lda : 1 ); } } else { if( m > n) { - tri_offset = rect_m * (!colmaj ? lda : 1); + tri_offset = rect_m * ( !colmaj ? lda : 1 ); if( !lower ) { rect_offset = 0; } } else if( n > m) { - tri_offset = rect_n * (colmaj ? lda : 1); + tri_offset = rect_n * ( colmaj ? lda : 1 ); if( lower ) { rect_offset = 0; } diff --git a/LAPACKE/utils/lapacke_stz_nancheck.c b/LAPACKE/utils/lapacke_stz_nancheck.c index 38cca2893d..7bdef4e2cc 100644 --- a/LAPACKE/utils/lapacke_stz_nancheck.c +++ b/LAPACKE/utils/lapacke_stz_nancheck.c @@ -111,18 +111,18 @@ lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, /* Fix offsets depending on the shape of the matrix */ if( front ) { if( lower && m > n) { - rect_offset = tri_n * (!colmaj ? lda : 1); + rect_offset = tri_n * ( !colmaj ? lda : 1 ); } else if( !lower && n > m) { - rect_offset = tri_n * (colmaj ? lda : 1); + rect_offset = tri_n * ( colmaj ? lda : 1 ); } } else { if( m > n) { - tri_offset = rect_m * (!colmaj ? lda : 1); + tri_offset = rect_m * ( !colmaj ? lda : 1 ); if( !lower ) { rect_offset = 0; } } else if( n > m) { - tri_offset = rect_n * (colmaj ? lda : 1); + tri_offset = rect_n * ( colmaj ? lda : 1 ); if( lower ) { rect_offset = 0; } diff --git a/LAPACKE/utils/lapacke_ztz_nancheck.c b/LAPACKE/utils/lapacke_ztz_nancheck.c index e4d24fadb2..bcd18cb586 100644 --- a/LAPACKE/utils/lapacke_ztz_nancheck.c +++ b/LAPACKE/utils/lapacke_ztz_nancheck.c @@ -112,18 +112,18 @@ lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, /* Fix offsets depending on the shape of the matrix */ if( front ) { if( lower && m > n) { - rect_offset = tri_n * (!colmaj ? lda : 1); + rect_offset = tri_n * ( !colmaj ? lda : 1 ); } else if( !lower && n > m) { - rect_offset = tri_n * (colmaj ? lda : 1); + rect_offset = tri_n * ( colmaj ? lda : 1 ); } } else { if( m > n) { - tri_offset = rect_m * (!colmaj ? lda : 1); + tri_offset = rect_m * ( !colmaj ? lda : 1 ); if( !lower ) { rect_offset = 0; } } else if( n > m) { - tri_offset = rect_n * (colmaj ? lda : 1); + tri_offset = rect_n * ( colmaj ? lda : 1 ); if( lower ) { rect_offset = 0; } From 70c4244912d50dcbb1fa10686514632910da734a Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 21:02:31 +0100 Subject: [PATCH 39/90] Paranthesis formatting. --- LAPACKE/utils/lapacke_ctz_nancheck.c | 12 ++++++------ LAPACKE/utils/lapacke_dtz_nancheck.c | 12 ++++++------ LAPACKE/utils/lapacke_stz_nancheck.c | 12 ++++++------ LAPACKE/utils/lapacke_ztz_nancheck.c | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/LAPACKE/utils/lapacke_ctz_nancheck.c b/LAPACKE/utils/lapacke_ctz_nancheck.c index 564857f4e6..bea9567811 100644 --- a/LAPACKE/utils/lapacke_ctz_nancheck.c +++ b/LAPACKE/utils/lapacke_ctz_nancheck.c @@ -106,23 +106,23 @@ lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, lapack_int tri_offset = 0; lapack_int tri_n = MIN(m,n); lapack_int rect_offset = -1; - lapack_int rect_m = (m > n) ? m - n : m; - lapack_int rect_n = (n > m) ? n - m : n; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; /* Fix offsets depending on the shape of the matrix */ if( front ) { - if( lower && m > n) { + if( lower && m > n ) { rect_offset = tri_n * ( !colmaj ? lda : 1 ); - } else if( !lower && n > m) { + } else if( !lower && n > m ) { rect_offset = tri_n * ( colmaj ? lda : 1 ); } } else { - if( m > n) { + if( m > n ) { tri_offset = rect_m * ( !colmaj ? lda : 1 ); if( !lower ) { rect_offset = 0; } - } else if( n > m) { + } else if( n > m ) { tri_offset = rect_n * ( colmaj ? lda : 1 ); if( lower ) { rect_offset = 0; diff --git a/LAPACKE/utils/lapacke_dtz_nancheck.c b/LAPACKE/utils/lapacke_dtz_nancheck.c index 189a5778dd..cd2ae6731a 100644 --- a/LAPACKE/utils/lapacke_dtz_nancheck.c +++ b/LAPACKE/utils/lapacke_dtz_nancheck.c @@ -105,23 +105,23 @@ lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, lapack_int tri_offset = 0; lapack_int tri_n = MIN(m,n); lapack_int rect_offset = -1; - lapack_int rect_m = (m > n) ? m - n : m; - lapack_int rect_n = (n > m) ? n - m : n; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; /* Fix offsets depending on the shape of the matrix */ if( front ) { - if( lower && m > n) { + if( lower && m > n ) { rect_offset = tri_n * ( !colmaj ? lda : 1 ); - } else if( !lower && n > m) { + } else if( !lower && n > m ) { rect_offset = tri_n * ( colmaj ? lda : 1 ); } } else { - if( m > n) { + if( m > n ) { tri_offset = rect_m * ( !colmaj ? lda : 1 ); if( !lower ) { rect_offset = 0; } - } else if( n > m) { + } else if( n > m ) { tri_offset = rect_n * ( colmaj ? lda : 1 ); if( lower ) { rect_offset = 0; diff --git a/LAPACKE/utils/lapacke_stz_nancheck.c b/LAPACKE/utils/lapacke_stz_nancheck.c index 7bdef4e2cc..7d7c30f96c 100644 --- a/LAPACKE/utils/lapacke_stz_nancheck.c +++ b/LAPACKE/utils/lapacke_stz_nancheck.c @@ -105,23 +105,23 @@ lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, lapack_int tri_offset = 0; lapack_int tri_n = MIN(m,n); lapack_int rect_offset = -1; - lapack_int rect_m = (m > n) ? m - n : m; - lapack_int rect_n = (n > m) ? n - m : n; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; /* Fix offsets depending on the shape of the matrix */ if( front ) { - if( lower && m > n) { + if( lower && m > n ) { rect_offset = tri_n * ( !colmaj ? lda : 1 ); - } else if( !lower && n > m) { + } else if( !lower && n > m ) { rect_offset = tri_n * ( colmaj ? lda : 1 ); } } else { - if( m > n) { + if( m > n ) { tri_offset = rect_m * ( !colmaj ? lda : 1 ); if( !lower ) { rect_offset = 0; } - } else if( n > m) { + } else if( n > m ) { tri_offset = rect_n * ( colmaj ? lda : 1 ); if( lower ) { rect_offset = 0; diff --git a/LAPACKE/utils/lapacke_ztz_nancheck.c b/LAPACKE/utils/lapacke_ztz_nancheck.c index bcd18cb586..481fa4c033 100644 --- a/LAPACKE/utils/lapacke_ztz_nancheck.c +++ b/LAPACKE/utils/lapacke_ztz_nancheck.c @@ -106,23 +106,23 @@ lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, lapack_int tri_offset = 0; lapack_int tri_n = MIN(m,n); lapack_int rect_offset = -1; - lapack_int rect_m = (m > n) ? m - n : m; - lapack_int rect_n = (n > m) ? n - m : n; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; /* Fix offsets depending on the shape of the matrix */ if( front ) { - if( lower && m > n) { + if( lower && m > n ) { rect_offset = tri_n * ( !colmaj ? lda : 1 ); - } else if( !lower && n > m) { + } else if( !lower && n > m ) { rect_offset = tri_n * ( colmaj ? lda : 1 ); } } else { - if( m > n) { + if( m > n ) { tri_offset = rect_m * ( !colmaj ? lda : 1 ); if( !lower ) { rect_offset = 0; } - } else if( n > m) { + } else if( n > m ) { tri_offset = rect_n * ( colmaj ? lda : 1 ); if( lower ) { rect_offset = 0; From 6b5c5bba7f944ec19957522c1c702948a44f4876 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 21:09:03 +0100 Subject: [PATCH 40/90] Implemented transpose function for trapezoidal matrices. --- LAPACKE/utils/lapacke_ctz_trans.c | 153 ++++++++++++++++++++++++++++++ LAPACKE/utils/lapacke_dtz_trans.c | 153 ++++++++++++++++++++++++++++++ LAPACKE/utils/lapacke_stz_trans.c | 153 ++++++++++++++++++++++++++++++ LAPACKE/utils/lapacke_ztz_trans.c | 153 ++++++++++++++++++++++++++++++ 4 files changed, 612 insertions(+) create mode 100644 LAPACKE/utils/lapacke_ctz_trans.c create mode 100644 LAPACKE/utils/lapacke_dtz_trans.c create mode 100644 LAPACKE/utils/lapacke_stz_trans.c create mode 100644 LAPACKE/utils/lapacke_ztz_trans.c diff --git a/LAPACKE/utils/lapacke_ctz_trans.c b/LAPACKE/utils/lapacke_ctz_trans.c new file mode 100644 index 0000000000..8910aee7da --- /dev/null +++ b/LAPACKE/utils/lapacke_ctz_trans.c @@ -0,0 +1,153 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Converts input triangular matrix from row-major(C) to column-major(Fortran) + layout or vice versa. The shape of the trapezoidal matrix is determined by + the arguments `direct` and `uplo`. `Direct` chooses the diagonal which shall + be considered and `uplo` tells us whether we use the upper or lower part of + the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +void LAPACKE_ctz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float *in, lapack_int ldin, + lapack_complex_float *out, lapack_int ldout ) +{ + lapack_logical colmaj, front, lower, unit; + + if( in == NULL || out == NULL ) return ; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_in_offset = 0; + lapack_int tri_out_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_in_offset = -1; + lapack_int rect_out_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_in_offset = tri_n * ( !colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( colmaj ? ldout : 1 ); + } else if( !lower && n > m ) { + rect_in_offset = tri_n * ( colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( !colmaj ? ldout : 1 ); + } + } else { + if( m > n ) { + tri_in_offset = rect_m * ( !colmaj ? ldin : 1 ); + tri_out_offset = rect_m * ( colmaj ? ldout : 1 ); + if( !lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } else if( n > m ) { + tri_in_offset = rect_n * ( colmaj ? ldin : 1 ); + tri_out_offset = rect_n * ( !colmaj ? ldout : 1 ); + if( lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } + } + + /* Copy & transpose rectangular part */ + if( rect_in_offset >= 0 && rect_out_offset >= 0 ) { + LAPACKE_cge_trans( matrix_layout, rect_m, rect_n, + &in[rect_in_offset], ldin, + &out[rect_out_offset], ldout ); + } + + /* Copy & transpose triangular part */ + return LAPACKE_ctr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); +} diff --git a/LAPACKE/utils/lapacke_dtz_trans.c b/LAPACKE/utils/lapacke_dtz_trans.c new file mode 100644 index 0000000000..80d94ead90 --- /dev/null +++ b/LAPACKE/utils/lapacke_dtz_trans.c @@ -0,0 +1,153 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Converts input triangular matrix from row-major(C) to column-major(Fortran) + layout or vice versa. The shape of the trapezoidal matrix is determined by + the arguments `direct` and `uplo`. `Direct` chooses the diagonal which shall + be considered and `uplo` tells us whether we use the upper or lower part of + the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +void LAPACKE_dtz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const double *in, lapack_int ldin, + double *out, lapack_int ldout ) +{ + lapack_logical colmaj, front, lower, unit; + + if( in == NULL || out == NULL ) return ; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_in_offset = 0; + lapack_int tri_out_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_in_offset = -1; + lapack_int rect_out_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_in_offset = tri_n * ( !colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( colmaj ? ldout : 1 ); + } else if( !lower && n > m ) { + rect_in_offset = tri_n * ( colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( !colmaj ? ldout : 1 ); + } + } else { + if( m > n ) { + tri_in_offset = rect_m * ( !colmaj ? ldin : 1 ); + tri_out_offset = rect_m * ( colmaj ? ldout : 1 ); + if( !lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } else if( n > m ) { + tri_in_offset = rect_n * ( colmaj ? ldin : 1 ); + tri_out_offset = rect_n * ( !colmaj ? ldout : 1 ); + if( lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } + } + + /* Copy & transpose rectangular part */ + if( rect_in_offset >= 0 && rect_out_offset >= 0 ) { + LAPACKE_dge_trans( matrix_layout, rect_m, rect_n, + &in[rect_in_offset], ldin, + &out[rect_out_offset], ldout ); + } + + /* Copy & transpose triangular part */ + return LAPACKE_dtr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); +} diff --git a/LAPACKE/utils/lapacke_stz_trans.c b/LAPACKE/utils/lapacke_stz_trans.c new file mode 100644 index 0000000000..793f3833d4 --- /dev/null +++ b/LAPACKE/utils/lapacke_stz_trans.c @@ -0,0 +1,153 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Converts input triangular matrix from row-major(C) to column-major(Fortran) + layout or vice versa. The shape of the trapezoidal matrix is determined by + the arguments `direct` and `uplo`. `Direct` chooses the diagonal which shall + be considered and `uplo` tells us whether we use the upper or lower part of + the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +void LAPACKE_stz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const float *in, lapack_int ldin, + float *out, lapack_int ldout ) +{ + lapack_logical colmaj, front, lower, unit; + + if( in == NULL || out == NULL ) return ; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_in_offset = 0; + lapack_int tri_out_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_in_offset = -1; + lapack_int rect_out_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_in_offset = tri_n * ( !colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( colmaj ? ldout : 1 ); + } else if( !lower && n > m ) { + rect_in_offset = tri_n * ( colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( !colmaj ? ldout : 1 ); + } + } else { + if( m > n ) { + tri_in_offset = rect_m * ( !colmaj ? ldin : 1 ); + tri_out_offset = rect_m * ( colmaj ? ldout : 1 ); + if( !lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } else if( n > m ) { + tri_in_offset = rect_n * ( colmaj ? ldin : 1 ); + tri_out_offset = rect_n * ( !colmaj ? ldout : 1 ); + if( lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } + } + + /* Copy & transpose rectangular part */ + if( rect_in_offset >= 0 && rect_out_offset >= 0 ) { + LAPACKE_sge_trans( matrix_layout, rect_m, rect_n, + &in[rect_in_offset], ldin, + &out[rect_out_offset], ldout ); + } + + /* Copy & transpose triangular part */ + return LAPACKE_str_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); +} diff --git a/LAPACKE/utils/lapacke_ztz_trans.c b/LAPACKE/utils/lapacke_ztz_trans.c new file mode 100644 index 0000000000..881052331e --- /dev/null +++ b/LAPACKE/utils/lapacke_ztz_trans.c @@ -0,0 +1,153 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Converts input triangular matrix from row-major(C) to column-major(Fortran) + layout or vice versa. The shape of the trapezoidal matrix is determined by + the arguments `direct` and `uplo`. `Direct` chooses the diagonal which shall + be considered and `uplo` tells us whether we use the upper or lower part of + the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +void LAPACKE_ztz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double *in, lapack_int ldin, + lapack_complex_double *out, lapack_int ldout ) +{ + lapack_logical colmaj, front, lower, unit; + + if( in == NULL || out == NULL ) return ; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_in_offset = 0; + lapack_int tri_out_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_in_offset = -1; + lapack_int rect_out_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_in_offset = tri_n * ( !colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( colmaj ? ldout : 1 ); + } else if( !lower && n > m ) { + rect_in_offset = tri_n * ( colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( !colmaj ? ldout : 1 ); + } + } else { + if( m > n ) { + tri_in_offset = rect_m * ( !colmaj ? ldin : 1 ); + tri_out_offset = rect_m * ( colmaj ? ldout : 1 ); + if( !lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } else if( n > m ) { + tri_in_offset = rect_n * ( colmaj ? ldin : 1 ); + tri_out_offset = rect_n * ( !colmaj ? ldout : 1 ); + if( lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } + } + + /* Copy & transpose rectangular part */ + if( rect_in_offset >= 0 && rect_out_offset >= 0 ) { + LAPACKE_zge_trans( matrix_layout, rect_m, rect_n, + &in[rect_in_offset], ldin, + &out[rect_out_offset], ldout ); + } + + /* Copy & transpose triangular part */ + return LAPACKE_ztr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); +} From c91f13e2f7ab6db18ffb96613c7fbdb64c4244ce Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 21:32:46 +0100 Subject: [PATCH 41/90] Fixed rows and cols order in ?larfb --- LAPACKE/src/lapacke_clarfb.c | 6 +++--- LAPACKE/src/lapacke_dlarfb.c | 6 +++--- LAPACKE/src/lapacke_slarfb.c | 6 +++--- LAPACKE/src/lapacke_zlarfb.c | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/LAPACKE/src/lapacke_clarfb.c b/LAPACKE/src/lapacke_clarfb.c index 8acbd9beb3..8b1492becf 100644 --- a/LAPACKE/src/lapacke_clarfb.c +++ b/LAPACKE/src/lapacke_clarfb.c @@ -42,7 +42,7 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct lapack_int info = 0; lapack_int ldwork; lapack_complex_float* work = NULL; - lapack_int ncols_v, nrows_v; + lapack_int nrows_v, ncols_v; lapack_logical left, col, forward; char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -56,8 +56,8 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct col = LAPACKE_lsame( storev, 'c' ); forward = LAPACKE_lsame( direct, 'f' ); - ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { @@ -65,7 +65,7 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct return -8; } if( LAPACKE_ctz_nancheck( matrix_layout, direct, uplo, 'u', - ncols_v, nrows_v, v, ldv ) ) { + nrows_v, ncols_v, v, ldv ) ) { return -9; } if( LAPACKE_cge_nancheck( matrix_layout, k, k, t, ldt ) ) { diff --git a/LAPACKE/src/lapacke_dlarfb.c b/LAPACKE/src/lapacke_dlarfb.c index 1fbb8639c6..82e8fae527 100644 --- a/LAPACKE/src/lapacke_dlarfb.c +++ b/LAPACKE/src/lapacke_dlarfb.c @@ -41,7 +41,7 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct lapack_int info = 0; lapack_int ldwork; double* work = NULL; - lapack_int ncols_v, nrows_v; + lapack_int nrows_v, ncols_v; lapack_logical left, col, forward; char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -55,8 +55,8 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct col = LAPACKE_lsame( storev, 'c' ); forward = LAPACKE_lsame( direct, 'f' ); - ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { @@ -64,7 +64,7 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct return -8; } if( LAPACKE_dtz_nancheck( matrix_layout, direct, uplo, 'u', - ncols_v, nrows_v, v, ldv ) ) { + nrows_v, ncols_v, v, ldv ) ) { return -9; } if( LAPACKE_dge_nancheck( matrix_layout, k, k, t, ldt ) ) { diff --git a/LAPACKE/src/lapacke_slarfb.c b/LAPACKE/src/lapacke_slarfb.c index 2aa95e044e..892648f4b7 100644 --- a/LAPACKE/src/lapacke_slarfb.c +++ b/LAPACKE/src/lapacke_slarfb.c @@ -41,7 +41,7 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct lapack_int info = 0; lapack_int ldwork; float* work = NULL; - lapack_int ncols_v, nrows_v; + lapack_int nrows_v, ncols_v; lapack_logical left, col, forward; char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -55,8 +55,8 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct col = LAPACKE_lsame( storev, 'c' ); forward = LAPACKE_lsame( direct, 'f' ); - ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { @@ -64,7 +64,7 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct return -8; } if( LAPACKE_stz_nancheck( matrix_layout, direct, uplo, 'u', - ncols_v, nrows_v, v, ldv ) ) { + nrows_v, ncols_v, v, ldv ) ) { return -9; } if( LAPACKE_sge_nancheck( matrix_layout, k, k, t, ldt ) ) { diff --git a/LAPACKE/src/lapacke_zlarfb.c b/LAPACKE/src/lapacke_zlarfb.c index 7d5c8354a8..25cedb5063 100644 --- a/LAPACKE/src/lapacke_zlarfb.c +++ b/LAPACKE/src/lapacke_zlarfb.c @@ -42,7 +42,7 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct lapack_int info = 0; lapack_int ldwork; lapack_complex_double* work = NULL; - lapack_int ncols_v, nrows_v; + lapack_int nrows_v, ncols_v; lapack_logical left, col, forward; char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -56,8 +56,8 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct col = LAPACKE_lsame( storev, 'c' ); forward = LAPACKE_lsame( direct, 'f' ); - ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { @@ -65,7 +65,7 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct return -8; } if( LAPACKE_ztz_nancheck( matrix_layout, direct, uplo, 'u', - ncols_v, nrows_v, v, ldv ) ) { + nrows_v, ncols_v, v, ldv ) ) { return -9; } if( LAPACKE_zge_nancheck( matrix_layout, k, k, t, ldt ) ) { From 87cb1d113c0bb493b0f80755a86bd59ad8928783 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 23:37:31 +0100 Subject: [PATCH 42/90] Added trapezoidal transpositions to LAPACKE_?larfb_work. --- LAPACKE/src/lapacke_clarfb_work.c | 57 +++++++++---------------------- LAPACKE/src/lapacke_dlarfb_work.c | 57 +++++++++---------------------- LAPACKE/src/lapacke_slarfb_work.c | 57 +++++++++---------------------- LAPACKE/src/lapacke_zlarfb_work.c | 57 +++++++++---------------------- 4 files changed, 68 insertions(+), 160 deletions(-) diff --git a/LAPACKE/src/lapacke_clarfb_work.c b/LAPACKE/src/lapacke_clarfb_work.c index 3ad97c22d0..90ff0851f0 100644 --- a/LAPACKE/src/lapacke_clarfb_work.c +++ b/LAPACKE/src/lapacke_clarfb_work.c @@ -42,6 +42,8 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, { lapack_int info = 0; lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; lapack_int ldc_t, ldt_t, ldv_t; lapack_complex_float *v_t = NULL, *t_t = NULL, *c_t = NULL; if( matrix_layout == LAPACK_COL_MAJOR ) { @@ -52,16 +54,14 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); + + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + ldc_t = MAX(1,m); ldt_t = MAX(1,k); ldv_t = MAX(1,nrows_v); @@ -81,6 +81,11 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); return info; } + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + info = -8; + LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); + return info; + } /* Allocate memory for temporary array(s) */ v_t = (lapack_complex_float*) LAPACKE_malloc( sizeof(lapack_complex_float) * @@ -102,36 +107,8 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, goto exit_level_2; } /* Transpose input matrices */ - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_ctr_trans( matrix_layout, 'l', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_cge_trans( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], ldv, - &v_t[k], ldv_t ); - } else if( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb_work", -8 ); - return -8; - } - LAPACKE_ctr_trans( matrix_layout, 'u', 'u', k, &v[(nrows_v-k)*ldv], - ldv, &v_t[nrows_v-k], ldv_t ); - LAPACKE_cge_trans( matrix_layout, nrows_v-k, ncols_v, v, ldv, v_t, - ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_ctr_trans( matrix_layout, 'u', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_cge_trans( matrix_layout, nrows_v, ncols_v-k, &v[k], ldv, - &v_t[k*ldv_t], ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb_work", -8 ); - return -8; - } - LAPACKE_ctr_trans( matrix_layout, 'l', 'u', k, &v[ncols_v-k], ldv, - &v_t[(ncols_v-k)*ldv_t], ldv_t ); - LAPACKE_cge_trans( matrix_layout, nrows_v, ncols_v-k, v, ldv, v_t, - ldv_t ); - } + LAPACKE_ctz_trans( matrix_layout, direct, uplo, 'u', nrows_v, ncols_v, + v, ldv, v_t, ldv_t ); LAPACKE_cge_trans( matrix_layout, k, k, t, ldt, t_t, ldt_t ); LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ diff --git a/LAPACKE/src/lapacke_dlarfb_work.c b/LAPACKE/src/lapacke_dlarfb_work.c index 57c53bae31..1a68bf7624 100644 --- a/LAPACKE/src/lapacke_dlarfb_work.c +++ b/LAPACKE/src/lapacke_dlarfb_work.c @@ -41,6 +41,8 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, { lapack_int info = 0; lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; lapack_int ldc_t, ldt_t, ldv_t; double *v_t = NULL, *t_t = NULL, *c_t = NULL; if( matrix_layout == LAPACK_COL_MAJOR ) { @@ -51,16 +53,14 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); + + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + ldc_t = MAX(1,m); ldt_t = MAX(1,k); ldv_t = MAX(1,nrows_v); @@ -80,6 +80,11 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); return info; } + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + info = -8; + LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); + return info; + } /* Allocate memory for temporary array(s) */ v_t = (double*) LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,ncols_v) ); @@ -98,36 +103,8 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, goto exit_level_2; } /* Transpose input matrices */ - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_dtr_trans( matrix_layout, 'l', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_dge_trans( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], ldv, - &v_t[k], ldv_t ); - } else if( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb_work", -8 ); - return -8; - } - LAPACKE_dtr_trans( matrix_layout, 'u', 'u', k, &v[(nrows_v-k)*ldv], - ldv, &v_t[nrows_v-k], ldv_t ); - LAPACKE_dge_trans( matrix_layout, nrows_v-k, ncols_v, v, ldv, v_t, - ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_dtr_trans( matrix_layout, 'u', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_dge_trans( matrix_layout, nrows_v, ncols_v-k, &v[k], ldv, - &v_t[k*ldv_t], ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb_work", -8 ); - return -8; - } - LAPACKE_dtr_trans( matrix_layout, 'l', 'u', k, &v[ncols_v-k], ldv, - &v_t[(ncols_v-k)*ldv_t], ldv_t ); - LAPACKE_dge_trans( matrix_layout, nrows_v, ncols_v-k, v, ldv, v_t, - ldv_t ); - } + LAPACKE_dtz_trans( matrix_layout, direct, uplo, 'u', nrows_v, ncols_v, + v, ldv, v_t, ldv_t ); LAPACKE_dge_trans( matrix_layout, k, k, t, ldt, t_t, ldt_t ); LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ diff --git a/LAPACKE/src/lapacke_slarfb_work.c b/LAPACKE/src/lapacke_slarfb_work.c index 2f5d616767..d805a947ae 100644 --- a/LAPACKE/src/lapacke_slarfb_work.c +++ b/LAPACKE/src/lapacke_slarfb_work.c @@ -41,6 +41,8 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, { lapack_int info = 0; lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; lapack_int ldc_t, ldt_t, ldv_t; float *v_t = NULL, *t_t = NULL, *c_t = NULL; if( matrix_layout == LAPACK_COL_MAJOR ) { @@ -51,16 +53,14 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); + + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + ldc_t = MAX(1,m); ldt_t = MAX(1,k); ldv_t = MAX(1,nrows_v); @@ -80,6 +80,11 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); return info; } + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + info = -8; + LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); + return info; + } /* Allocate memory for temporary array(s) */ v_t = (float*)LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,ncols_v) ); if( v_t == NULL ) { @@ -97,36 +102,8 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, goto exit_level_2; } /* Transpose input matrices */ - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_str_trans( matrix_layout, 'l', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_sge_trans( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], ldv, - &v_t[k], ldv_t ); - } else if( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb_work", -8 ); - return -8; - } - LAPACKE_str_trans( matrix_layout, 'u', 'u', k, &v[(nrows_v-k)*ldv], - ldv, &v_t[nrows_v-k], ldv_t ); - LAPACKE_sge_trans( matrix_layout, nrows_v-k, ncols_v, v, ldv, v_t, - ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_str_trans( matrix_layout, 'u', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_sge_trans( matrix_layout, nrows_v, ncols_v-k, &v[k], ldv, - &v_t[k*ldv_t], ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb_work", -8 ); - return -8; - } - LAPACKE_str_trans( matrix_layout, 'l', 'u', k, &v[ncols_v-k], ldv, - &v_t[(ncols_v-k)*ldv_t], ldv_t ); - LAPACKE_sge_trans( matrix_layout, nrows_v, ncols_v-k, v, ldv, v_t, - ldv_t ); - } + LAPACKE_stz_trans( matrix_layout, direct, uplo, 'u', nrows_v, ncols_v, + v, ldv, v_t, ldv_t ); LAPACKE_sge_trans( matrix_layout, k, k, t, ldt, t_t, ldt_t ); LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ diff --git a/LAPACKE/src/lapacke_zlarfb_work.c b/LAPACKE/src/lapacke_zlarfb_work.c index 1b4f892a17..64eb05263a 100644 --- a/LAPACKE/src/lapacke_zlarfb_work.c +++ b/LAPACKE/src/lapacke_zlarfb_work.c @@ -42,6 +42,8 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, { lapack_int info = 0; lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; lapack_int ldc_t, ldt_t, ldv_t; lapack_complex_double *v_t = NULL, *t_t = NULL, *c_t = NULL; if( matrix_layout == LAPACK_COL_MAJOR ) { @@ -52,16 +54,14 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); + + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + ldc_t = MAX(1,m); ldt_t = MAX(1,k); ldv_t = MAX(1,nrows_v); @@ -81,6 +81,11 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); return info; } + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); + return info; + } /* Allocate memory for temporary array(s) */ v_t = (lapack_complex_double*) LAPACKE_malloc( sizeof(lapack_complex_double) * @@ -102,36 +107,8 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, goto exit_level_2; } /* Transpose input matrices */ - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_ztr_trans( matrix_layout, 'l', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_zge_trans( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], ldv, - &v_t[k], ldv_t ); - } else if( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb_work", -8 ); - return -8; - } - LAPACKE_ztr_trans( matrix_layout, 'u', 'u', k, &v[(nrows_v-k)*ldv], - ldv, &v_t[nrows_v-k], ldv_t ); - LAPACKE_zge_trans( matrix_layout, nrows_v-k, ncols_v, v, ldv, v_t, - ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_ztr_trans( matrix_layout, 'u', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_zge_trans( matrix_layout, nrows_v, ncols_v-k, &v[k], ldv, - &v_t[k*ldv_t], ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb_work", -8 ); - return -8; - } - LAPACKE_ztr_trans( matrix_layout, 'l', 'u', k, &v[ncols_v-k], ldv, - &v_t[(ncols_v-k)*ldv_t], ldv_t ); - LAPACKE_zge_trans( matrix_layout, nrows_v, ncols_v-k, v, ldv, v_t, - ldv_t ); - } + LAPACKE_ztz_trans( matrix_layout, direct, uplo, 'u', nrows_v, ncols_v, + v, ldv, v_t, ldv_t ); LAPACKE_zge_trans( matrix_layout, k, k, t, ldt, t_t, ldt_t ); LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ From de9eb94acfdb091d98afc5a763a7fd41b8014689 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 23:46:55 +0100 Subject: [PATCH 43/90] Updated lapacke_utils header --- LAPACKE/include/lapacke_utils.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/LAPACKE/include/lapacke_utils.h b/LAPACKE/include/lapacke_utils.h index 9fe2ae7355..332a5024fb 100644 --- a/LAPACKE/include/lapacke_utils.h +++ b/LAPACKE/include/lapacke_utils.h @@ -128,6 +128,10 @@ void LAPACKE_ctp_trans( int matrix_layout, char uplo, char diag, void LAPACKE_ctr_trans( int matrix_layout, char uplo, char diag, lapack_int n, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); +void LAPACKE_ctz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float *in, lapack_int ldin, + lapack_complex_float *out, lapack_int ldout ); void LAPACKE_dgb_trans( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, @@ -178,6 +182,10 @@ void LAPACKE_dtp_trans( int matrix_layout, char uplo, char diag, void LAPACKE_dtr_trans( int matrix_layout, char uplo, char diag, lapack_int n, const double *in, lapack_int ldin, double *out, lapack_int ldout ); +void LAPACKE_dtz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const double *in, lapack_int ldin, + double *out, lapack_int ldout ); void LAPACKE_sgb_trans( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, @@ -228,6 +236,10 @@ void LAPACKE_stp_trans( int matrix_layout, char uplo, char diag, void LAPACKE_str_trans( int matrix_layout, char uplo, char diag, lapack_int n, const float *in, lapack_int ldin, float *out, lapack_int ldout ); +void LAPACKE_stz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const float *in, lapack_int ldin, + float *out, lapack_int ldout ); void LAPACKE_zgb_trans( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, @@ -284,6 +296,10 @@ void LAPACKE_ztp_trans( int matrix_layout, char uplo, char diag, void LAPACKE_ztr_trans( int matrix_layout, char uplo, char diag, lapack_int n, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); +void LAPACKE_ztz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double *in, lapack_int ldin, + lapack_complex_double *out, lapack_int ldout ); /* NaN checkers */ #define LAPACK_SISNAN( x ) ( x != x ) From 6ef9ae99dab97cb109218d7c8819b5aea7f58a50 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 30 Oct 2022 23:47:16 +0100 Subject: [PATCH 44/90] Updated build system. --- LAPACKE/utils/CMakeLists.txt | 1 + LAPACKE/utils/Makefile | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/LAPACKE/utils/CMakeLists.txt b/LAPACKE/utils/CMakeLists.txt index 97ddfe5656..dfb9aa3702 100644 --- a/LAPACKE/utils/CMakeLists.txt +++ b/LAPACKE/utils/CMakeLists.txt @@ -38,6 +38,7 @@ lapacke_ctp_trans.c lapacke_dtp_trans.c lapacke_stp_trans.c lapacke_ctr_nancheck.c lapacke_dtr_nancheck.c lapacke_str_nancheck.c lapacke_ztr_nancheck.c lapacke_ctr_trans.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztr_trans.c lapacke_ctz_nancheck.c lapacke_dtz_nancheck.c lapacke_stz_nancheck.c lapacke_ztz_nancheck.c +lapacke_ctz_trans.c lapacke_dtz_trans.c lapacke_stz_trans.c lapacke_ztz_trans.c lapacke_make_complex_float.c lapacke_make_complex_double.c lapacke_lsame.c diff --git a/LAPACKE/utils/Makefile b/LAPACKE/utils/Makefile index d8f4dac96e..a1f8631071 100644 --- a/LAPACKE/utils/Makefile +++ b/LAPACKE/utils/Makefile @@ -77,6 +77,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_ctr_nancheck.o \ lapacke_ctr_trans.o \ lapacke_ctz_nancheck.o \ + lapacke_ctz_trans.o \ lapacke_dgb_nancheck.o \ lapacke_dgb_trans.o \ lapacke_dge_nancheck.o \ @@ -112,6 +113,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_dtr_nancheck.o \ lapacke_dtr_trans.o \ lapacke_dtz_nancheck.o \ + lapacke_dtz_trans.o \ lapacke_lsame.o \ lapacke_sgb_nancheck.o \ lapacke_sgb_trans.o \ @@ -148,6 +150,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_str_nancheck.o \ lapacke_str_trans.o \ lapacke_stz_nancheck.o \ + lapacke_stz_trans.o \ lapacke_xerbla.o \ lapacke_zgb_nancheck.o \ lapacke_zgb_trans.o \ @@ -188,6 +191,7 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_ztr_nancheck.o \ lapacke_ztr_trans.o \ lapacke_ztz_nancheck.o \ + lapacke_ztz_trans.o \ lapacke_make_complex_float.o \ lapacke_make_complex_double.o From 80392de422088ed6785f109b949144612b6aa00e Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Mon, 31 Oct 2022 00:01:47 +0100 Subject: [PATCH 45/90] Added missing declaration for LAPACKE_ctrsyl3_work. --- LAPACKE/include/lapacke.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/LAPACKE/include/lapacke.h b/LAPACKE/include/lapacke.h index 29d5943a32..9998b15047 100644 --- a/LAPACKE/include/lapacke.h +++ b/LAPACKE/include/lapacke.h @@ -10233,6 +10233,13 @@ lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb, double* c, lapack_int ldc, double* scale, lapack_int* iwork, lapack_int liwork, double* swork, lapack_int ldswork ); +lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc, + float* scale, float* swork, + lapack_int ldswork ); lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, From 48377cdf6f84d996b4eabfea40417ea314e018d0 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Mon, 31 Oct 2022 00:29:43 +0100 Subject: [PATCH 46/90] Added missing iwork query. --- LAPACKE/src/lapacke_dtrsyl3.c | 6 ++++-- LAPACKE/src/lapacke_strsyl3.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/LAPACKE/src/lapacke_dtrsyl3.c b/LAPACKE/src/lapacke_dtrsyl3.c index 523235c93a..c95a772deb 100644 --- a/LAPACKE/src/lapacke_dtrsyl3.c +++ b/LAPACKE/src/lapacke_dtrsyl3.c @@ -11,6 +11,7 @@ lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb, double* swork = NULL; lapack_int ldswork = -1; lapack_int swork_size = -1; + lapack_int iwork_query; lapack_int* iwork = NULL; lapack_int liwork = -1; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -33,7 +34,7 @@ lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb, #endif /* Query optimal working array sizes */ info = LAPACKE_dtrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda, - b, ldb, c, ldc, scale, iwork, liwork, + b, ldb, c, ldc, scale, &iwork_query, liwork, swork_query, ldswork ); if( info != 0 ) { goto exit_level_0; @@ -45,8 +46,9 @@ lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb, info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } + liwork = iwork_query; iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if (iwork == NULL ) { + if ( iwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_1; } diff --git a/LAPACKE/src/lapacke_strsyl3.c b/LAPACKE/src/lapacke_strsyl3.c index 6db54f21f5..1cfc626c22 100644 --- a/LAPACKE/src/lapacke_strsyl3.c +++ b/LAPACKE/src/lapacke_strsyl3.c @@ -11,6 +11,7 @@ lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb, float* swork = NULL; lapack_int ldswork = -1; lapack_int swork_size = -1; + lapack_int iwork_query; lapack_int* iwork = NULL; lapack_int liwork = -1; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -33,7 +34,7 @@ lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb, #endif /* Query optimal working array sizes */ info = LAPACKE_strsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda, - b, ldb, c, ldc, scale, iwork, liwork, + b, ldb, c, ldc, scale, &iwork_query, liwork, swork_query, ldswork ); if( info != 0 ) { goto exit_level_0; @@ -45,8 +46,9 @@ lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb, info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } + liwork = iwork_query; iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if (iwork == NULL ) { + if ( iwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_1; } From f8b6b40498c026618bd461a3d5a156b5d5efa539 Mon Sep 17 00:00:00 2001 From: Weslley da Silva Pereira Date: Mon, 7 Nov 2022 15:48:55 -0700 Subject: [PATCH 47/90] Fix wrong documentation as reported in #733 --- SRC/clarscl2.f | 10 +++++----- SRC/clascl2.f | 12 ++++++------ SRC/dlarscl2.f | 10 +++++----- SRC/dlascl2.f | 10 +++++----- SRC/slarscl2.f | 10 +++++----- SRC/slascl2.f | 10 +++++----- SRC/zlarscl2.f | 10 +++++----- SRC/zlascl2.f | 10 +++++----- 8 files changed, 41 insertions(+), 41 deletions(-) diff --git a/SRC/clarscl2.f b/SRC/clarscl2.f index 26b028dbba..f4e68523b2 100644 --- a/SRC/clarscl2.f +++ b/SRC/clarscl2.f @@ -1,4 +1,4 @@ -*> \brief \b CLARSCL2 performs reciprocal diagonal scaling on a vector. +*> \brief \b CLARSCL2 performs reciprocal diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -34,7 +34,7 @@ *> *> \verbatim *> -*> CLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> CLARSCL2 performs a reciprocal diagonal scaling on a matrix: *> x <-- inv(D) * x *> where the REAL diagonal matrix D is stored as a vector. *> @@ -66,14 +66,14 @@ *> \param[in,out] X *> \verbatim *> X is COMPLEX array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/clascl2.f b/SRC/clascl2.f index 2ae27975c5..882273b5e2 100644 --- a/SRC/clascl2.f +++ b/SRC/clascl2.f @@ -1,4 +1,4 @@ -*> \brief \b CLASCL2 performs diagonal scaling on a vector. +*> \brief \b CLASCL2 performs diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -34,9 +34,9 @@ *> *> \verbatim *> -*> CLASCL2 performs a diagonal scaling on a vector: +*> CLASCL2 performs a diagonal scaling on a matrix: *> x <-- D * x -*> where the diagonal REAL matrix D is stored as a vector. +*> where the diagonal REAL matrix D is stored as a matrix. *> *> Eventually to be replaced by BLAS_cge_diag_scale in the new BLAS *> standard. @@ -66,14 +66,14 @@ *> \param[in,out] X *> \verbatim *> X is COMPLEX array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/dlarscl2.f b/SRC/dlarscl2.f index 2468e2702d..cc4b9aa3c1 100644 --- a/SRC/dlarscl2.f +++ b/SRC/dlarscl2.f @@ -1,4 +1,4 @@ -*> \brief \b DLARSCL2 performs reciprocal diagonal scaling on a vector. +*> \brief \b DLARSCL2 performs reciprocal diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -33,7 +33,7 @@ *> *> \verbatim *> -*> DLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> DLARSCL2 performs a reciprocal diagonal scaling on a matrix: *> x <-- inv(D) * x *> where the diagonal matrix D is stored as a vector. *> @@ -65,14 +65,14 @@ *> \param[in,out] X *> \verbatim *> X is DOUBLE PRECISION array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/dlascl2.f b/SRC/dlascl2.f index 901e43c494..568e296ad0 100644 --- a/SRC/dlascl2.f +++ b/SRC/dlascl2.f @@ -1,4 +1,4 @@ -*> \brief \b DLASCL2 performs diagonal scaling on a vector. +*> \brief \b DLASCL2 performs diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -33,7 +33,7 @@ *> *> \verbatim *> -*> DLASCL2 performs a diagonal scaling on a vector: +*> DLASCL2 performs a diagonal scaling on a matrix: *> x <-- D * x *> where the diagonal matrix D is stored as a vector. *> @@ -65,14 +65,14 @@ *> \param[in,out] X *> \verbatim *> X is DOUBLE PRECISION array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/slarscl2.f b/SRC/slarscl2.f index 5726f12cd5..c7b77c9083 100644 --- a/SRC/slarscl2.f +++ b/SRC/slarscl2.f @@ -1,4 +1,4 @@ -*> \brief \b SLARSCL2 performs reciprocal diagonal scaling on a vector. +*> \brief \b SLARSCL2 performs reciprocal diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -33,7 +33,7 @@ *> *> \verbatim *> -*> SLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> SLARSCL2 performs a reciprocal diagonal scaling on a matrix: *> x <-- inv(D) * x *> where the diagonal matrix D is stored as a vector. *> @@ -65,14 +65,14 @@ *> \param[in,out] X *> \verbatim *> X is REAL array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/slascl2.f b/SRC/slascl2.f index 07b506a8c1..5efc1cfcd2 100644 --- a/SRC/slascl2.f +++ b/SRC/slascl2.f @@ -1,4 +1,4 @@ -*> \brief \b SLASCL2 performs diagonal scaling on a vector. +*> \brief \b SLASCL2 performs diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -33,7 +33,7 @@ *> *> \verbatim *> -*> SLASCL2 performs a diagonal scaling on a vector: +*> SLASCL2 performs a diagonal scaling on a matrix: *> x <-- D * x *> where the diagonal matrix D is stored as a vector. *> @@ -65,14 +65,14 @@ *> \param[in,out] X *> \verbatim *> X is REAL array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/zlarscl2.f b/SRC/zlarscl2.f index 4a1e1603a4..e618659067 100644 --- a/SRC/zlarscl2.f +++ b/SRC/zlarscl2.f @@ -1,4 +1,4 @@ -*> \brief \b ZLARSCL2 performs reciprocal diagonal scaling on a vector. +*> \brief \b ZLARSCL2 performs reciprocal diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -34,7 +34,7 @@ *> *> \verbatim *> -*> ZLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> ZLARSCL2 performs a reciprocal diagonal scaling on a matrix: *> x <-- inv(D) * x *> where the DOUBLE PRECISION diagonal matrix D is stored as a vector. *> @@ -66,14 +66,14 @@ *> \param[in,out] X *> \verbatim *> X is COMPLEX*16 array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/zlascl2.f b/SRC/zlascl2.f index c4e6992fbe..26406c3636 100644 --- a/SRC/zlascl2.f +++ b/SRC/zlascl2.f @@ -1,4 +1,4 @@ -*> \brief \b ZLASCL2 performs diagonal scaling on a vector. +*> \brief \b ZLASCL2 performs diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -34,7 +34,7 @@ *> *> \verbatim *> -*> ZLASCL2 performs a diagonal scaling on a vector: +*> ZLASCL2 performs a diagonal scaling on a matrix: *> x <-- D * x *> where the DOUBLE PRECISION diagonal matrix D is stored as a vector. *> @@ -66,14 +66,14 @@ *> \param[in,out] X *> \verbatim *> X is COMPLEX*16 array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: From 211fbbed0e8e1d5f10e0afb6abddc5a0b5a04685 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 13:22:32 +0100 Subject: [PATCH 48/90] allow conda updates again --- .appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 2996701e01..3b5c3325b1 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -24,8 +24,8 @@ install: - conda config --add channels conda-forge --force - conda clean --all --yes # - conda install --yes conda=4.3.8 --force - - conda install --yes -c conda-forge charset-normalizer=2.0.5 --force -# - conda update --yes -n base conda +# - conda install --yes -c conda-forge charset-normalizer=2.0.5 --force + - conda update --yes -n base conda - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" From f076f7482b336198a1f94276a92865d1edd22d04 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 14:14:42 +0100 Subject: [PATCH 49/90] reshuffle conda commands --- .appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 3b5c3325b1..ada848e087 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -21,11 +21,11 @@ environment: install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false - - conda config --add channels conda-forge --force - - conda clean --all --yes # - conda install --yes conda=4.3.8 --force # - conda install --yes -c conda-forge charset-normalizer=2.0.5 --force - conda update --yes -n base conda + - conda config --add channels conda-forge --force + - conda clean --all --yes - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" From 6fa0844c310c017e09ce737731578c2490cfb13c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 14:33:37 +0100 Subject: [PATCH 50/90] Update .appveyor.yml --- .appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index ada848e087..5ae94cc812 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -23,9 +23,9 @@ install: # - conda config --set auto_update_conda false # - conda install --yes conda=4.3.8 --force # - conda install --yes -c conda-forge charset-normalizer=2.0.5 --force - - conda update --yes -n base conda +# - conda update --yes -n base conda - conda config --add channels conda-forge --force - - conda clean --all --yes +# - conda clean --all --yes - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" From 4bf5eb723cef1f81502f215cb309c98fbfe4d21d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 16:03:37 +0100 Subject: [PATCH 51/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 5ae94cc812..6efad7cc98 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -24,7 +24,7 @@ install: # - conda install --yes conda=4.3.8 --force # - conda install --yes -c conda-forge charset-normalizer=2.0.5 --force # - conda update --yes -n base conda - - conda config --add channels conda-forge --force +# - conda config --add channels conda-forge --force # - conda clean --all --yes - conda install --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 From ab12b8590950390779da03357ee55a5e6df1f9dc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 17:09:50 +0100 Subject: [PATCH 52/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 6efad7cc98..d576709938 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -26,7 +26,7 @@ install: # - conda update --yes -n base conda # - conda config --add channels conda-forge --force # - conda clean --all --yes - - conda install --yes --quiet flang jom + - conda install -c conda-forge --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" From 70373e1a4c8fc5325befc6215d17bd3e5baa6d7e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 17:24:27 +0100 Subject: [PATCH 53/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index d576709938..11bf35498b 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -26,7 +26,7 @@ install: # - conda update --yes -n base conda # - conda config --add channels conda-forge --force # - conda clean --all --yes - - conda install -c conda-forge --yes --quiet flang jom + - conda install -c conda-forge --yes --quiet flang - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" From ac7d65548439b23ed6ef3e4a2c21166a8e5d3ecc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 18:53:46 +0100 Subject: [PATCH 54/90] Update .appveyor.yml --- .appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.appveyor.yml b/.appveyor.yml index 11bf35498b..0490b02574 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -19,6 +19,7 @@ environment: CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 install: + - conda update --yes conda - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false # - conda install --yes conda=4.3.8 --force From 798ab5f8c3179b753ff024591e31acc054578162 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 21:54:38 +0100 Subject: [PATCH 55/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 0490b02574..cec157de00 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -27,7 +27,7 @@ install: # - conda update --yes -n base conda # - conda config --add channels conda-forge --force # - conda clean --all --yes - - conda install -c conda-forge --yes --quiet flang + - %CONDA_INSTALL_LOCN%\conda install -c conda-forge --yes --quiet flang - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" From f7fa63f973568290315e7bfb7864b05ab3e5b245 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 22:06:05 +0100 Subject: [PATCH 56/90] Update .appveyor.yml --- .appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index cec157de00..b38e31397c 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -19,7 +19,7 @@ environment: CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 install: - - conda update --yes conda + - call %CONDA_INSTALL_LOCN%\conda update --yes conda - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false # - conda install --yes conda=4.3.8 --force @@ -27,7 +27,7 @@ install: # - conda update --yes -n base conda # - conda config --add channels conda-forge --force # - conda clean --all --yes - - %CONDA_INSTALL_LOCN%\conda install -c conda-forge --yes --quiet flang + - conda install -c conda-forge --yes --quiet flang - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" From a790c4dbe5ff554179c51f9d23996884722c5000 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 8 Nov 2022 22:23:59 +0100 Subject: [PATCH 57/90] Update .appveyor.yml --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index b38e31397c..f2a079ec4d 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -19,7 +19,7 @@ environment: CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 install: - - call %CONDA_INSTALL_LOCN%\conda update --yes conda + - call %CONDA_INSTALL_LOCN%\Scripts\conda update --yes conda - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false # - conda install --yes conda=4.3.8 --force From 4c8b76da4e8a0652cfef9ae41a6422a8c81e9913 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 9 Nov 2022 11:22:53 +0100 Subject: [PATCH 58/90] use newer miniconda --- .appveyor.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index f2a079ec4d..7e2d75383b 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -16,10 +16,10 @@ cache: environment: global: - CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 + CONDA_INSTALL_LOCN: C:\\Miniconda37-x64 install: - - call %CONDA_INSTALL_LOCN%\Scripts\conda update --yes conda +# - call %CONDA_INSTALL_LOCN%\Scripts\conda update --yes conda - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false # - conda install --yes conda=4.3.8 --force @@ -27,7 +27,7 @@ install: # - conda update --yes -n base conda # - conda config --add channels conda-forge --force # - conda clean --all --yes - - conda install -c conda-forge --yes --quiet flang + - conda install -c conda-forge --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" From b095fc6b5bad1e4298ba5d0d1b635fed5b867fe6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 9 Nov 2022 11:37:01 +0100 Subject: [PATCH 59/90] temporarily disable build cache --- .appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 7e2d75383b..3140554e2b 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -11,8 +11,8 @@ skip_commits: # Add [av skip] to commit messages message: /\[av skip\]/ -cache: - - '%APPVEYOR_BUILD_FOLDER%\build' +#cache: +# - '%APPVEYOR_BUILD_FOLDER%\build' environment: global: From 4911ca48b159e37ce151e4dd2c117dd74bdc1c92 Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Wed, 9 Nov 2022 10:00:46 -0700 Subject: [PATCH 60/90] Update .appveyor.yml Removing comments. Keep the one commented line that disabled automatic updates of conda itself. --- .appveyor.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 3140554e2b..f5c21313d9 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -11,22 +11,13 @@ skip_commits: # Add [av skip] to commit messages message: /\[av skip\]/ -#cache: -# - '%APPVEYOR_BUILD_FOLDER%\build' - environment: global: CONDA_INSTALL_LOCN: C:\\Miniconda37-x64 install: -# - call %CONDA_INSTALL_LOCN%\Scripts\conda update --yes conda - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat # - conda config --set auto_update_conda false -# - conda install --yes conda=4.3.8 --force -# - conda install --yes -c conda-forge charset-normalizer=2.0.5 --force -# - conda update --yes -n base conda -# - conda config --add channels conda-forge --force -# - conda clean --all --yes - conda install -c conda-forge --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" From d5203e94d5ea36b0fcce4f70bcc4a0be3b1fa325 Mon Sep 17 00:00:00 2001 From: scr2016 Date: Thu, 10 Nov 2022 07:00:47 -0800 Subject: [PATCH 61/90] added missing testing for error exits for ZGELST routine, i.e. modified TESTING/LIN/zerrls.f accordingly. Also fixed comments typo in [S,D,C,Z]GELST. modified: SRC/cgelst.f modified: SRC/dgelst.f modified: SRC/sgelst.f modified: SRC/zgelst.f modified: TESTING/LIN/zerrls.f --- SRC/cgelst.f | 2 +- SRC/dgelst.f | 2 +- SRC/sgelst.f | 2 +- SRC/zgelst.f | 2 +- TESTING/LIN/zerrls.f | 61 ++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 63 insertions(+), 6 deletions(-) diff --git a/SRC/cgelst.f b/SRC/cgelst.f index 26dda1c0ca..7d8e44ddf2 100644 --- a/SRC/cgelst.f +++ b/SRC/cgelst.f @@ -1,4 +1,4 @@ -*> \brief CGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representaion of Q. +*> \brief CGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. * * =========== DOCUMENTATION =========== * diff --git a/SRC/dgelst.f b/SRC/dgelst.f index 80957d5113..ca0e04a9b8 100644 --- a/SRC/dgelst.f +++ b/SRC/dgelst.f @@ -1,4 +1,4 @@ -*> \brief DGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representaion of Q. +*> \brief DGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. * * =========== DOCUMENTATION =========== * diff --git a/SRC/sgelst.f b/SRC/sgelst.f index 5e27e37e38..5377bc720a 100644 --- a/SRC/sgelst.f +++ b/SRC/sgelst.f @@ -1,4 +1,4 @@ -*> \brief SGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representaion of Q. +*> \brief SGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. * * =========== DOCUMENTATION =========== * diff --git a/SRC/zgelst.f b/SRC/zgelst.f index 35b73dc454..4dabdc91e6 100644 --- a/SRC/zgelst.f +++ b/SRC/zgelst.f @@ -1,4 +1,4 @@ -*> \brief ZGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representaion of Q. +*> \brief ZGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. * * =========== DOCUMENTATION =========== * diff --git a/TESTING/LIN/zerrls.f b/TESTING/LIN/zerrls.f index 66e56c8c6b..22f049ee06 100644 --- a/TESTING/LIN/zerrls.f +++ b/TESTING/LIN/zerrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> ZERRLS tests the error exits for the COMPLEX*16 least squares -*> driver routines (ZGELS, CGELSS, CGELSY, CGELSD). +*> driver routines (ZGELS, ZGELST, ZGETSLS, CGELSS, CGELSY, CGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ SUBROUTINE ZERRLS( PATH, NUNIT ) EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, ZGELS, ZGELSD, ZGELSS, ZGELSY + EXTERNAL ALAESM, CHKXER, ZGELS, ZGELSD, ZGELSS, ZGELST, + $ ZGELSY, ZGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ SUBROUTINE ZERRLS( PATH, NUNIT ) INFOT = 8 CALL ZGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'ZGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'ZGELS ', INFOT, NOUT, LERR, OK ) * +* ZGELST +* + SRNAMT = 'ZGELST' + INFOT = 1 + CALL ZGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) +* +* ZGETSLS +* + SRNAMT = 'ZGETSLS' + INFOT = 1 + CALL ZGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) +* * ZGELSS * SRNAMT = 'ZGELSS' From 9300418fe417915817e5eab9b63fc919b4d8b2d3 Mon Sep 17 00:00:00 2001 From: Weslley da Silva Pereira Date: Thu, 10 Nov 2022 10:29:22 -0700 Subject: [PATCH 62/90] Change README and CMakeLists to LAPACK 3.11 --- CMakeLists.txt | 4 ++-- README.md | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 41b3c138cf..68a7767b8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 3.2) project(LAPACK Fortran C) set(LAPACK_MAJOR_VERSION 3) -set(LAPACK_MINOR_VERSION 10) -set(LAPACK_PATCH_VERSION 1) +set(LAPACK_MINOR_VERSION 11) +set(LAPACK_PATCH_VERSION 0) set( LAPACK_VERSION ${LAPACK_MAJOR_VERSION}.${LAPACK_MINOR_VERSION}.${LAPACK_PATCH_VERSION} diff --git a/README.md b/README.md index c255cc2de7..142aa7b720 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ * VERSION 3.9.1 : April 2021 * VERSION 3.10.0 : June 2021 * VERSION 3.10.1 : April 2022 +* VERSION 3.11.0 : November 2022 LAPACK is a library of Fortran subroutines for solving the most commonly occurring problems in numerical linear algebra. From 57f15d84c170451502c5d623628a9b2194a47259 Mon Sep 17 00:00:00 2001 From: Weslley da Silva Pereira Date: Thu, 10 Nov 2022 10:40:48 -0700 Subject: [PATCH 63/90] Change ILAVER and Doxyfile accordingly --- DOCS/Doxyfile | 2 +- DOCS/Doxyfile_man | 2 +- INSTALL/ilaver.f | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DOCS/Doxyfile b/DOCS/Doxyfile index 4229769c98..97415241c4 100644 --- a/DOCS/Doxyfile +++ b/DOCS/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 3.10.1 +PROJECT_NUMBER = 3.11.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/DOCS/Doxyfile_man b/DOCS/Doxyfile_man index 6a64156fba..6625de4566 100644 --- a/DOCS/Doxyfile_man +++ b/DOCS/Doxyfile_man @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 3.10.1 +PROJECT_NUMBER = 3.11.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/INSTALL/ilaver.f b/INSTALL/ilaver.f index fd179ceb81..7d55e13e9d 100644 --- a/INSTALL/ilaver.f +++ b/INSTALL/ilaver.f @@ -58,8 +58,8 @@ SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH * ===================================================================== VERS_MAJOR = 3 - VERS_MINOR = 10 - VERS_PATCH = 1 + VERS_MINOR = 11 + VERS_PATCH = 0 * ===================================================================== * RETURN From 7f9f67572a8e3eafa01939caaed6f778aa2f8f78 Mon Sep 17 00:00:00 2001 From: Dmitry Klyuchinsky Date: Thu, 17 Nov 2022 19:06:00 +0700 Subject: [PATCH 64/90] fix wrong indexation in slatrs3 test --- TESTING/LIN/schktr.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TESTING/LIN/schktr.f b/TESTING/LIN/schktr.f index 5aeb1ce88c..33f07726ec 100644 --- a/TESTING/LIN/schktr.f +++ b/TESTING/LIN/schktr.f @@ -559,7 +559,7 @@ SUBROUTINE SCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) * CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, - $ SCALE3 ( 1 ), RWORK, ONE, B( N+1 ), LDA, + $ SCALE3 ( 1 ), RWORK, ONE, B( 1 ), LDA, $ X, LDA, WORK, RESULT( 10 ) ) CALL SSCAL( N, BIGNUM, X, 1 ) CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, From a4dd9261efd5647488ad8c4e09790b2c46280754 Mon Sep 17 00:00:00 2001 From: Dmitry Klyuchinsky Date: Thu, 17 Nov 2022 19:10:08 +0700 Subject: [PATCH 65/90] use lsame instead EQ in iparam2stage --- SRC/iparam2stage.F | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/SRC/iparam2stage.F b/SRC/iparam2stage.F index c153eef22b..c701c2be08 100644 --- a/SRC/iparam2stage.F +++ b/SRC/iparam2stage.F @@ -178,7 +178,8 @@ INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + LOGICAL LSAME + EXTERNAL ILAENV, LSAME * .. * .. Executable Statements .. * @@ -310,7 +311,7 @@ INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, * * Will add the VECT OPTION HERE next release VECT = OPTS(1:1) - IF( VECT.EQ.'N' ) THEN + IF( LSAME( VECT, 'N' ) ) THEN LHOUS = MAX( 1, 4*NI ) ELSE * This is not correct, it need to call the ALGO and the stage2 From 9b5e2c50e36fbd8728155e15c8924b7fcaf88900 Mon Sep 17 00:00:00 2001 From: Dmitry Klyuchinsky Date: Fri, 18 Nov 2022 17:58:06 +0700 Subject: [PATCH 66/90] fixed bug in array bounds in complex syl01 test --- TESTING/EIG/csyl01.f | 2 +- TESTING/EIG/zsyl01.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/TESTING/EIG/csyl01.f b/TESTING/EIG/csyl01.f index e21f1a7a03..82d790daa5 100644 --- a/TESTING/EIG/csyl01.f +++ b/TESTING/EIG/csyl01.f @@ -124,7 +124,7 @@ SUBROUTINE CSYL01( THRESH, NFAIL, RMAX, NINFO, KNT ) $ C( MAXM, MAXN ), CC( MAXM, MAXN ), $ X( MAXM, MAXN ), $ DUML( MAXM ), DUMR( MAXN ), - $ D( MIN( MAXM, MAXN ) ) + $ D( MAX( MAXM, MAXN ) ) REAL SWORK( LDSWORK, 54 ), DUM( MAXN ), VM( 2 ) INTEGER ISEED( 4 ), IWORK( MAXM + MAXN + 2 ) * .. diff --git a/TESTING/EIG/zsyl01.f b/TESTING/EIG/zsyl01.f index 1e8619a34c..329f39dc4f 100644 --- a/TESTING/EIG/zsyl01.f +++ b/TESTING/EIG/zsyl01.f @@ -124,7 +124,7 @@ SUBROUTINE ZSYL01( THRESH, NFAIL, RMAX, NINFO, KNT ) $ C( MAXM, MAXN ), CC( MAXM, MAXN ), $ X( MAXM, MAXN ), $ DUML( MAXM ), DUMR( MAXN ), - $ D( MIN( MAXM, MAXN ) ) + $ D( MAX( MAXM, MAXN ) ) DOUBLE PRECISION SWORK( LDSWORK, 103 ), DUM( MAXN ), VM( 2 ) INTEGER ISEED( 4 ), IWORK( MAXM + MAXN + 2 ) * .. From 846f3cb3249083980f7a56896aa60d5c73620cf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20M=C3=A4rtens?= Date: Fri, 18 Nov 2022 16:36:53 +0100 Subject: [PATCH 67/90] Removed unnecessary return statement in void function. --- LAPACKE/utils/lapacke_ctz_trans.c | 6 +++--- LAPACKE/utils/lapacke_dtz_trans.c | 6 +++--- LAPACKE/utils/lapacke_stz_trans.c | 6 +++--- LAPACKE/utils/lapacke_ztz_trans.c | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/LAPACKE/utils/lapacke_ctz_trans.c b/LAPACKE/utils/lapacke_ctz_trans.c index 8910aee7da..0abe03d283 100644 --- a/LAPACKE/utils/lapacke_ctz_trans.c +++ b/LAPACKE/utils/lapacke_ctz_trans.c @@ -147,7 +147,7 @@ void LAPACKE_ctz_trans( int matrix_layout, char direct, char uplo, } /* Copy & transpose triangular part */ - return LAPACKE_ctr_trans( matrix_layout, uplo, diag, tri_n, - &in[tri_in_offset], ldin, - &out[tri_out_offset], ldout ); + LAPACKE_ctr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); } diff --git a/LAPACKE/utils/lapacke_dtz_trans.c b/LAPACKE/utils/lapacke_dtz_trans.c index 80d94ead90..f53e03adcf 100644 --- a/LAPACKE/utils/lapacke_dtz_trans.c +++ b/LAPACKE/utils/lapacke_dtz_trans.c @@ -147,7 +147,7 @@ void LAPACKE_dtz_trans( int matrix_layout, char direct, char uplo, } /* Copy & transpose triangular part */ - return LAPACKE_dtr_trans( matrix_layout, uplo, diag, tri_n, - &in[tri_in_offset], ldin, - &out[tri_out_offset], ldout ); + LAPACKE_dtr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); } diff --git a/LAPACKE/utils/lapacke_stz_trans.c b/LAPACKE/utils/lapacke_stz_trans.c index 793f3833d4..bdb4279572 100644 --- a/LAPACKE/utils/lapacke_stz_trans.c +++ b/LAPACKE/utils/lapacke_stz_trans.c @@ -147,7 +147,7 @@ void LAPACKE_stz_trans( int matrix_layout, char direct, char uplo, } /* Copy & transpose triangular part */ - return LAPACKE_str_trans( matrix_layout, uplo, diag, tri_n, - &in[tri_in_offset], ldin, - &out[tri_out_offset], ldout ); + LAPACKE_str_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); } diff --git a/LAPACKE/utils/lapacke_ztz_trans.c b/LAPACKE/utils/lapacke_ztz_trans.c index 881052331e..fa4bb94c5f 100644 --- a/LAPACKE/utils/lapacke_ztz_trans.c +++ b/LAPACKE/utils/lapacke_ztz_trans.c @@ -147,7 +147,7 @@ void LAPACKE_ztz_trans( int matrix_layout, char direct, char uplo, } /* Copy & transpose triangular part */ - return LAPACKE_ztr_trans( matrix_layout, uplo, diag, tri_n, - &in[tri_in_offset], ldin, - &out[tri_out_offset], ldout ); + LAPACKE_ztr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); } From 273d758e4a638b084f4747e4a8d3abe335d63f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20M=C3=A4rtens?= Date: Fri, 18 Nov 2022 16:37:47 +0100 Subject: [PATCH 68/90] Fixed wrong external function name '[D|Z]GESVQ' -> '[D|Z]GESVDQ' --- TESTING/EIG/derred.f | 2 +- TESTING/EIG/zerred.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/TESTING/EIG/derred.f b/TESTING/EIG/derred.f index 6df5178253..11a9320526 100644 --- a/TESTING/EIG/derred.f +++ b/TESTING/EIG/derred.f @@ -99,7 +99,7 @@ SUBROUTINE DERRED( PATH, NUNIT ) * .. * .. External Subroutines .. EXTERNAL CHKXER, DGEES, DGEESX, DGEEV, DGEEVX, DGEJSV, - $ DGESDD, DGESVD, DGESVDX, DGESVQ + $ DGESDD, DGESVD, DGESVDX, DGESVDQ * .. * .. External Functions .. LOGICAL DSLECT, LSAMEN diff --git a/TESTING/EIG/zerred.f b/TESTING/EIG/zerred.f index d1219c02b9..1876c1f1d7 100644 --- a/TESTING/EIG/zerred.f +++ b/TESTING/EIG/zerred.f @@ -100,7 +100,7 @@ SUBROUTINE ZERRED( PATH, NUNIT ) * .. * .. External Subroutines .. EXTERNAL CHKXER, ZGEES, ZGEESX, ZGEEV, ZGEEVX, ZGESVJ, - $ ZGESDD, ZGESVD, ZGESVDX, ZGESVQ + $ ZGESDD, ZGESVD, ZGESVDX, ZGESVDQ * .. * .. External Functions .. LOGICAL LSAMEN, ZSLECT From feed6a75344bbd190e037574e35d546a784e4be3 Mon Sep 17 00:00:00 2001 From: Julien Schueller Date: Fri, 18 Nov 2022 19:23:41 +0100 Subject: [PATCH 69/90] CMake: Disable TEST_FORTRAN_COMPILER Closes #757 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68a7767b8d..33905ed1b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,7 @@ if(_is_coverage_build) endif() # By default test Fortran compiler complex abs and complex division -option(TEST_FORTRAN_COMPILER "Test Fortran compiler complex abs and complex division" ON) +option(TEST_FORTRAN_COMPILER "Test Fortran compiler complex abs and complex division" OFF) if( TEST_FORTRAN_COMPILER ) add_executable( test_zcomplexabs ${LAPACK_SOURCE_DIR}/INSTALL/test_zcomplexabs.f ) From e7afed35fb2ee3b82f34e324953b92f63883f670 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sat, 19 Nov 2022 21:25:15 +0100 Subject: [PATCH 70/90] Fixed usage of `HAS_ATTRIBUTE_WEAK_SUPPORT`. Formatted cblas_f77.h --- CBLAS/include/cblas.h | 6 +- CBLAS/include/cblas_f77.h | 1777 +++++++++++++++++----------------- CBLAS/testing/CMakeLists.txt | 24 + 3 files changed, 920 insertions(+), 887 deletions(-) diff --git a/CBLAS/include/cblas.h b/CBLAS/include/cblas.h index f7d411571c..124baf17e8 100644 --- a/CBLAS/include/cblas.h +++ b/CBLAS/include/cblas.h @@ -608,7 +608,11 @@ void cblas_zher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const void *B, const CBLAS_INT ldb, const double beta, void *C, const CBLAS_INT ldc); -void cblas_xerbla(CBLAS_INT p, const char *rout, const char *form, ...); +void +#ifdef HAS_ATTRIBUTE_WEAK_SUPPORT +__attribute__((weak)) +#endif +cblas_xerbla(CBLAS_INT p, const char *rout, const char *form, ...); #ifdef __cplusplus } diff --git a/CBLAS/include/cblas_f77.h b/CBLAS/include/cblas_f77.h index 8d8e929873..283c50c520 100644 --- a/CBLAS/include/cblas_f77.h +++ b/CBLAS/include/cblas_f77.h @@ -47,37 +47,37 @@ */ #define F77_xerbla_base F77_GLOBAL(xerbla,XERBLA) -#define F77_srotg_base F77_GLOBAL(srotg,SROTG) +#define F77_srotg_base F77_GLOBAL(srotg,SROTG) #define F77_srotmg_base F77_GLOBAL(srotmg,SROTMG) -#define F77_srot_base F77_GLOBAL(srot,SROT) -#define F77_srotm_base F77_GLOBAL(srotm,SROTM) -#define F77_drotg_base F77_GLOBAL(drotg,DROTG) +#define F77_srot_base F77_GLOBAL(srot,SROT) +#define F77_srotm_base F77_GLOBAL(srotm,SROTM) +#define F77_drotg_base F77_GLOBAL(drotg,DROTG) #define F77_drotmg_base F77_GLOBAL(drotmg,DROTMG) -#define F77_drot_base F77_GLOBAL(drot,DROT) -#define F77_drotm_base F77_GLOBAL(drotm,DROTM) -#define F77_sswap_base F77_GLOBAL(sswap,SSWAP) -#define F77_scopy_base F77_GLOBAL(scopy,SCOPY) -#define F77_saxpy_base F77_GLOBAL(saxpy,SAXPY) +#define F77_drot_base F77_GLOBAL(drot,DROT) +#define F77_drotm_base F77_GLOBAL(drotm,DROTM) +#define F77_sswap_base F77_GLOBAL(sswap,SSWAP) +#define F77_scopy_base F77_GLOBAL(scopy,SCOPY) +#define F77_saxpy_base F77_GLOBAL(saxpy,SAXPY) #define F77_isamax_sub_base F77_GLOBAL(isamaxsub,ISAMAXSUB) -#define F77_dswap_base F77_GLOBAL(dswap,DSWAP) -#define F77_dcopy_base F77_GLOBAL(dcopy,DCOPY) -#define F77_daxpy_base F77_GLOBAL(daxpy,DAXPY) +#define F77_dswap_base F77_GLOBAL(dswap,DSWAP) +#define F77_dcopy_base F77_GLOBAL(dcopy,DCOPY) +#define F77_daxpy_base F77_GLOBAL(daxpy,DAXPY) #define F77_idamax_sub_base F77_GLOBAL(idamaxsub,IDAMAXSUB) -#define F77_cswap_base F77_GLOBAL(cswap,CSWAP) -#define F77_ccopy_base F77_GLOBAL(ccopy,CCOPY) -#define F77_caxpy_base F77_GLOBAL(caxpy,CAXPY) +#define F77_cswap_base F77_GLOBAL(cswap,CSWAP) +#define F77_ccopy_base F77_GLOBAL(ccopy,CCOPY) +#define F77_caxpy_base F77_GLOBAL(caxpy,CAXPY) #define F77_icamax_sub_base F77_GLOBAL(icamaxsub,ICAMAXSUB) -#define F77_zswap_base F77_GLOBAL(zswap,ZSWAP) -#define F77_zcopy_base F77_GLOBAL(zcopy,ZCOPY) -#define F77_zaxpy_base F77_GLOBAL(zaxpy,ZAXPY) +#define F77_zswap_base F77_GLOBAL(zswap,ZSWAP) +#define F77_zcopy_base F77_GLOBAL(zcopy,ZCOPY) +#define F77_zaxpy_base F77_GLOBAL(zaxpy,ZAXPY) #define F77_izamax_sub_base F77_GLOBAL(izamaxsub,IZAMAXSUB) -#define F77_sdot_sub_base F77_GLOBAL(sdotsub,SDOTSUB) -#define F77_ddot_sub_base F77_GLOBAL(ddotsub,DDOTSUB) +#define F77_sdot_sub_base F77_GLOBAL(sdotsub,SDOTSUB) +#define F77_ddot_sub_base F77_GLOBAL(ddotsub,DDOTSUB) #define F77_dsdot_sub_base F77_GLOBAL(dsdotsub,DSDOTSUB) -#define F77_sscal_base F77_GLOBAL(sscal,SSCAL) -#define F77_dscal_base F77_GLOBAL(dscal,DSCAL) -#define F77_cscal_base F77_GLOBAL(cscal,CSCAL) -#define F77_zscal_base F77_GLOBAL(zscal,ZSCAL) +#define F77_sscal_base F77_GLOBAL(sscal,SSCAL) +#define F77_dscal_base F77_GLOBAL(dscal,DSCAL) +#define F77_cscal_base F77_GLOBAL(cscal,CSCAL) +#define F77_zscal_base F77_GLOBAL(zscal,ZSCAL) #define F77_csscal_base F77_GLOBAL(csscal,CSSCAL) #define F77_zdscal_base F77_GLOBAL(zdscal,ZDSCAL) #define F77_cdotu_sub_base F77_GLOBAL(cdotusub,CDOTUSUB) @@ -93,115 +93,115 @@ #define F77_dznrm2_sub_base F77_GLOBAL(dznrm2sub,DZNRM2SUB) #define F77_dzasum_sub_base F77_GLOBAL(dzasumsub,DZASUMSUB) #define F77_sdsdot_sub_base F77_GLOBAL(sdsdotsub,SDSDOTSUB) -#define F77_crotg_base F77_GLOBAL(crotg, CROTG) -#define F77_csrot_base F77_GLOBAL(csrot, CSROT) -#define F77_zrotg_base F77_GLOBAL(zrotg, ZROTG) -#define F77_zdrot_base F77_GLOBAL(zdrot, ZDROT) -#define F77_scabs1_sub_base F77_GLOBAL(scabs1sub, SCABS1SUB) -#define F77_dcabs1_sub_base F77_GLOBAL(dcabs1sub, DCABS1SUB) +#define F77_crotg_base F77_GLOBAL(crotg, CROTG) +#define F77_csrot_base F77_GLOBAL(csrot, CSROT) +#define F77_zrotg_base F77_GLOBAL(zrotg, ZROTG) +#define F77_zdrot_base F77_GLOBAL(zdrot, ZDROT) +#define F77_scabs1_sub_base F77_GLOBAL(scabs1sub, SCABS1SUB) +#define F77_dcabs1_sub_base F77_GLOBAL(dcabs1sub, DCABS1SUB) /* * Level 2 BLAS */ -#define F77_ssymv_base F77_GLOBAL(ssymv,SSYMV) -#define F77_ssbmv_base F77_GLOBAL(ssbmv,SSBMV) -#define F77_sspmv_base F77_GLOBAL(sspmv,SSPMV) -#define F77_sger_base F77_GLOBAL(sger,SGER) -#define F77_ssyr_base F77_GLOBAL(ssyr,SSYR) -#define F77_sspr_base F77_GLOBAL(sspr,SSPR) -#define F77_ssyr2_base F77_GLOBAL(ssyr2,SSYR2) -#define F77_sspr2_base F77_GLOBAL(sspr2,SSPR2) -#define F77_dsymv_base F77_GLOBAL(dsymv,DSYMV) -#define F77_dsbmv_base F77_GLOBAL(dsbmv,DSBMV) -#define F77_dspmv_base F77_GLOBAL(dspmv,DSPMV) -#define F77_dger_base F77_GLOBAL(dger,DGER) -#define F77_dsyr_base F77_GLOBAL(dsyr,DSYR) -#define F77_dspr_base F77_GLOBAL(dspr,DSPR) -#define F77_dsyr2_base F77_GLOBAL(dsyr2,DSYR2) -#define F77_dspr2_base F77_GLOBAL(dspr2,DSPR2) -#define F77_chemv_base F77_GLOBAL(chemv,CHEMV) -#define F77_chbmv_base F77_GLOBAL(chbmv,CHBMV) -#define F77_chpmv_base F77_GLOBAL(chpmv,CHPMV) -#define F77_cgeru_base F77_GLOBAL(cgeru,CGERU) -#define F77_cgerc_base F77_GLOBAL(cgerc,CGERC) -#define F77_cher_base F77_GLOBAL(cher,CHER) -#define F77_chpr_base F77_GLOBAL(chpr,CHPR) -#define F77_cher2_base F77_GLOBAL(cher2,CHER2) -#define F77_chpr2_base F77_GLOBAL(chpr2,CHPR2) -#define F77_zhemv_base F77_GLOBAL(zhemv,ZHEMV) -#define F77_zhbmv_base F77_GLOBAL(zhbmv,ZHBMV) -#define F77_zhpmv_base F77_GLOBAL(zhpmv,ZHPMV) -#define F77_zgeru_base F77_GLOBAL(zgeru,ZGERU) -#define F77_zgerc_base F77_GLOBAL(zgerc,ZGERC) -#define F77_zher_base F77_GLOBAL(zher,ZHER) -#define F77_zhpr_base F77_GLOBAL(zhpr,ZHPR) -#define F77_zher2_base F77_GLOBAL(zher2,ZHER2) -#define F77_zhpr2_base F77_GLOBAL(zhpr2,ZHPR2) -#define F77_sgemv_base F77_GLOBAL(sgemv,SGEMV) -#define F77_sgbmv_base F77_GLOBAL(sgbmv,SGBMV) -#define F77_strmv_base F77_GLOBAL(strmv,STRMV) -#define F77_stbmv_base F77_GLOBAL(stbmv,STBMV) -#define F77_stpmv_base F77_GLOBAL(stpmv,STPMV) -#define F77_strsv_base F77_GLOBAL(strsv,STRSV) -#define F77_stbsv_base F77_GLOBAL(stbsv,STBSV) -#define F77_stpsv_base F77_GLOBAL(stpsv,STPSV) -#define F77_dgemv_base F77_GLOBAL(dgemv,DGEMV) -#define F77_dgbmv_base F77_GLOBAL(dgbmv,DGBMV) -#define F77_dtrmv_base F77_GLOBAL(dtrmv,DTRMV) -#define F77_dtbmv_base F77_GLOBAL(dtbmv,DTBMV) -#define F77_dtpmv_base F77_GLOBAL(dtpmv,DTPMV) -#define F77_dtrsv_base F77_GLOBAL(dtrsv,DTRSV) -#define F77_dtbsv_base F77_GLOBAL(dtbsv,DTBSV) -#define F77_dtpsv_base F77_GLOBAL(dtpsv,DTPSV) -#define F77_cgemv_base F77_GLOBAL(cgemv,CGEMV) -#define F77_cgbmv_base F77_GLOBAL(cgbmv,CGBMV) -#define F77_ctrmv_base F77_GLOBAL(ctrmv,CTRMV) -#define F77_ctbmv_base F77_GLOBAL(ctbmv,CTBMV) -#define F77_ctpmv_base F77_GLOBAL(ctpmv,CTPMV) -#define F77_ctrsv_base F77_GLOBAL(ctrsv,CTRSV) -#define F77_ctbsv_base F77_GLOBAL(ctbsv,CTBSV) -#define F77_ctpsv_base F77_GLOBAL(ctpsv,CTPSV) -#define F77_zgemv_base F77_GLOBAL(zgemv,ZGEMV) -#define F77_zgbmv_base F77_GLOBAL(zgbmv,ZGBMV) -#define F77_ztrmv_base F77_GLOBAL(ztrmv,ZTRMV) -#define F77_ztbmv_base F77_GLOBAL(ztbmv,ZTBMV) -#define F77_ztpmv_base F77_GLOBAL(ztpmv,ZTPMV) -#define F77_ztrsv_base F77_GLOBAL(ztrsv,ZTRSV) -#define F77_ztbsv_base F77_GLOBAL(ztbsv,ZTBSV) -#define F77_ztpsv_base F77_GLOBAL(ztpsv,ZTPSV) +#define F77_ssymv_base F77_GLOBAL(ssymv,SSYMV) +#define F77_ssbmv_base F77_GLOBAL(ssbmv,SSBMV) +#define F77_sspmv_base F77_GLOBAL(sspmv,SSPMV) +#define F77_sger_base F77_GLOBAL(sger,SGER) +#define F77_ssyr_base F77_GLOBAL(ssyr,SSYR) +#define F77_sspr_base F77_GLOBAL(sspr,SSPR) +#define F77_ssyr2_base F77_GLOBAL(ssyr2,SSYR2) +#define F77_sspr2_base F77_GLOBAL(sspr2,SSPR2) +#define F77_dsymv_base F77_GLOBAL(dsymv,DSYMV) +#define F77_dsbmv_base F77_GLOBAL(dsbmv,DSBMV) +#define F77_dspmv_base F77_GLOBAL(dspmv,DSPMV) +#define F77_dger_base F77_GLOBAL(dger,DGER) +#define F77_dsyr_base F77_GLOBAL(dsyr,DSYR) +#define F77_dspr_base F77_GLOBAL(dspr,DSPR) +#define F77_dsyr2_base F77_GLOBAL(dsyr2,DSYR2) +#define F77_dspr2_base F77_GLOBAL(dspr2,DSPR2) +#define F77_chemv_base F77_GLOBAL(chemv,CHEMV) +#define F77_chbmv_base F77_GLOBAL(chbmv,CHBMV) +#define F77_chpmv_base F77_GLOBAL(chpmv,CHPMV) +#define F77_cgeru_base F77_GLOBAL(cgeru,CGERU) +#define F77_cgerc_base F77_GLOBAL(cgerc,CGERC) +#define F77_cher_base F77_GLOBAL(cher,CHER) +#define F77_chpr_base F77_GLOBAL(chpr,CHPR) +#define F77_cher2_base F77_GLOBAL(cher2,CHER2) +#define F77_chpr2_base F77_GLOBAL(chpr2,CHPR2) +#define F77_zhemv_base F77_GLOBAL(zhemv,ZHEMV) +#define F77_zhbmv_base F77_GLOBAL(zhbmv,ZHBMV) +#define F77_zhpmv_base F77_GLOBAL(zhpmv,ZHPMV) +#define F77_zgeru_base F77_GLOBAL(zgeru,ZGERU) +#define F77_zgerc_base F77_GLOBAL(zgerc,ZGERC) +#define F77_zher_base F77_GLOBAL(zher,ZHER) +#define F77_zhpr_base F77_GLOBAL(zhpr,ZHPR) +#define F77_zher2_base F77_GLOBAL(zher2,ZHER2) +#define F77_zhpr2_base F77_GLOBAL(zhpr2,ZHPR2) +#define F77_sgemv_base F77_GLOBAL(sgemv,SGEMV) +#define F77_sgbmv_base F77_GLOBAL(sgbmv,SGBMV) +#define F77_strmv_base F77_GLOBAL(strmv,STRMV) +#define F77_stbmv_base F77_GLOBAL(stbmv,STBMV) +#define F77_stpmv_base F77_GLOBAL(stpmv,STPMV) +#define F77_strsv_base F77_GLOBAL(strsv,STRSV) +#define F77_stbsv_base F77_GLOBAL(stbsv,STBSV) +#define F77_stpsv_base F77_GLOBAL(stpsv,STPSV) +#define F77_dgemv_base F77_GLOBAL(dgemv,DGEMV) +#define F77_dgbmv_base F77_GLOBAL(dgbmv,DGBMV) +#define F77_dtrmv_base F77_GLOBAL(dtrmv,DTRMV) +#define F77_dtbmv_base F77_GLOBAL(dtbmv,DTBMV) +#define F77_dtpmv_base F77_GLOBAL(dtpmv,DTPMV) +#define F77_dtrsv_base F77_GLOBAL(dtrsv,DTRSV) +#define F77_dtbsv_base F77_GLOBAL(dtbsv,DTBSV) +#define F77_dtpsv_base F77_GLOBAL(dtpsv,DTPSV) +#define F77_cgemv_base F77_GLOBAL(cgemv,CGEMV) +#define F77_cgbmv_base F77_GLOBAL(cgbmv,CGBMV) +#define F77_ctrmv_base F77_GLOBAL(ctrmv,CTRMV) +#define F77_ctbmv_base F77_GLOBAL(ctbmv,CTBMV) +#define F77_ctpmv_base F77_GLOBAL(ctpmv,CTPMV) +#define F77_ctrsv_base F77_GLOBAL(ctrsv,CTRSV) +#define F77_ctbsv_base F77_GLOBAL(ctbsv,CTBSV) +#define F77_ctpsv_base F77_GLOBAL(ctpsv,CTPSV) +#define F77_zgemv_base F77_GLOBAL(zgemv,ZGEMV) +#define F77_zgbmv_base F77_GLOBAL(zgbmv,ZGBMV) +#define F77_ztrmv_base F77_GLOBAL(ztrmv,ZTRMV) +#define F77_ztbmv_base F77_GLOBAL(ztbmv,ZTBMV) +#define F77_ztpmv_base F77_GLOBAL(ztpmv,ZTPMV) +#define F77_ztrsv_base F77_GLOBAL(ztrsv,ZTRSV) +#define F77_ztbsv_base F77_GLOBAL(ztbsv,ZTBSV) +#define F77_ztpsv_base F77_GLOBAL(ztpsv,ZTPSV) /* * Level 3 BLAS */ -#define F77_chemm_base F77_GLOBAL(chemm,CHEMM) -#define F77_cherk_base F77_GLOBAL(cherk,CHERK) -#define F77_cher2k_base F77_GLOBAL(cher2k,CHER2K) -#define F77_zhemm_base F77_GLOBAL(zhemm,ZHEMM) -#define F77_zherk_base F77_GLOBAL(zherk,ZHERK) -#define F77_zher2k_base F77_GLOBAL(zher2k,ZHER2K) -#define F77_sgemm_base F77_GLOBAL(sgemm,SGEMM) -#define F77_ssymm_base F77_GLOBAL(ssymm,SSYMM) -#define F77_ssyrk_base F77_GLOBAL(ssyrk,SSYRK) -#define F77_ssyr2k_base F77_GLOBAL(ssyr2k,SSYR2K) -#define F77_strmm_base F77_GLOBAL(strmm,STRMM) -#define F77_strsm_base F77_GLOBAL(strsm,STRSM) -#define F77_dgemm_base F77_GLOBAL(dgemm,DGEMM) -#define F77_dsymm_base F77_GLOBAL(dsymm,DSYMM) -#define F77_dsyrk_base F77_GLOBAL(dsyrk,DSYRK) -#define F77_dsyr2k_base F77_GLOBAL(dsyr2k,DSYR2K) -#define F77_dtrmm_base F77_GLOBAL(dtrmm,DTRMM) -#define F77_dtrsm_base F77_GLOBAL(dtrsm,DTRSM) -#define F77_cgemm_base F77_GLOBAL(cgemm,CGEMM) -#define F77_csymm_base F77_GLOBAL(csymm,CSYMM) -#define F77_csyrk_base F77_GLOBAL(csyrk,CSYRK) -#define F77_csyr2k_base F77_GLOBAL(csyr2k,CSYR2K) -#define F77_ctrmm_base F77_GLOBAL(ctrmm,CTRMM) -#define F77_ctrsm_base F77_GLOBAL(ctrsm,CTRSM) -#define F77_zgemm_base F77_GLOBAL(zgemm,ZGEMM) -#define F77_zsymm_base F77_GLOBAL(zsymm,ZSYMM) -#define F77_zsyrk_base F77_GLOBAL(zsyrk,ZSYRK) -#define F77_zsyr2k_base F77_GLOBAL(zsyr2k,ZSYR2K) -#define F77_ztrmm_base F77_GLOBAL(ztrmm,ZTRMM) -#define F77_ztrsm_base F77_GLOBAL(ztrsm,ZTRSM) +#define F77_chemm_base F77_GLOBAL(chemm,CHEMM) +#define F77_cherk_base F77_GLOBAL(cherk,CHERK) +#define F77_cher2k_base F77_GLOBAL(cher2k,CHER2K) +#define F77_zhemm_base F77_GLOBAL(zhemm,ZHEMM) +#define F77_zherk_base F77_GLOBAL(zherk,ZHERK) +#define F77_zher2k_base F77_GLOBAL(zher2k,ZHER2K) +#define F77_sgemm_base F77_GLOBAL(sgemm,SGEMM) +#define F77_ssymm_base F77_GLOBAL(ssymm,SSYMM) +#define F77_ssyrk_base F77_GLOBAL(ssyrk,SSYRK) +#define F77_ssyr2k_base F77_GLOBAL(ssyr2k,SSYR2K) +#define F77_strmm_base F77_GLOBAL(strmm,STRMM) +#define F77_strsm_base F77_GLOBAL(strsm,STRSM) +#define F77_dgemm_base F77_GLOBAL(dgemm,DGEMM) +#define F77_dsymm_base F77_GLOBAL(dsymm,DSYMM) +#define F77_dsyrk_base F77_GLOBAL(dsyrk,DSYRK) +#define F77_dsyr2k_base F77_GLOBAL(dsyr2k,DSYR2K) +#define F77_dtrmm_base F77_GLOBAL(dtrmm,DTRMM) +#define F77_dtrsm_base F77_GLOBAL(dtrsm,DTRSM) +#define F77_cgemm_base F77_GLOBAL(cgemm,CGEMM) +#define F77_csymm_base F77_GLOBAL(csymm,CSYMM) +#define F77_csyrk_base F77_GLOBAL(csyrk,CSYRK) +#define F77_csyr2k_base F77_GLOBAL(csyr2k,CSYR2K) +#define F77_ctrmm_base F77_GLOBAL(ctrmm,CTRMM) +#define F77_ctrsm_base F77_GLOBAL(ctrsm,CTRSM) +#define F77_zgemm_base F77_GLOBAL(zgemm,ZGEMM) +#define F77_zsymm_base F77_GLOBAL(zsymm,ZSYMM) +#define F77_zsyrk_base F77_GLOBAL(zsyrk,ZSYRK) +#define F77_zsyr2k_base F77_GLOBAL(zsyr2k,ZSYR2K) +#define F77_ztrmm_base F77_GLOBAL(ztrmm,ZTRMM) +#define F77_ztrsm_base F77_GLOBAL(ztrsm,ZTRSM) /* * Level 1 Fortran variadic definitions @@ -210,81 +210,81 @@ /* Single Precision */ - #define F77_srot(...) F77_srot_base(__VA_ARGS__) - #define F77_srotg(...) F77_srotg_base(__VA_ARGS__) - #define F77_srotm(...) F77_srotm_base(__VA_ARGS__) - #define F77_srotmg(...) F77_srotmg_base(__VA_ARGS__) - #define F77_sswap(...) F77_sswap_base(__VA_ARGS__) - #define F77_scopy(...) F77_scopy_base(__VA_ARGS__) - #define F77_saxpy(...) F77_saxpy_base(__VA_ARGS__) - #define F77_sdot_sub(...) F77_sdot_sub_base(__VA_ARGS__) - #define F77_sdsdot_sub(...) F77_sdsdot_sub_base(__VA_ARGS__) - #define F77_sscal(...) F77_sscal_base(__VA_ARGS__) - #define F77_snrm2_sub(...) F77_snrm2_sub_base(__VA_ARGS__) - #define F77_sasum_sub(...) F77_sasum_sub_base(__VA_ARGS__) - #define F77_isamax_sub(...) F77_isamax_sub_base(__VA_ARGS__) - #define F77_scabs1_sub(...) F77_scabs1_sub_base(__VA_ARGS__) +#define F77_srot(...) F77_srot_base(__VA_ARGS__) +#define F77_srotg(...) F77_srotg_base(__VA_ARGS__) +#define F77_srotm(...) F77_srotm_base(__VA_ARGS__) +#define F77_srotmg(...) F77_srotmg_base(__VA_ARGS__) +#define F77_sswap(...) F77_sswap_base(__VA_ARGS__) +#define F77_scopy(...) F77_scopy_base(__VA_ARGS__) +#define F77_saxpy(...) F77_saxpy_base(__VA_ARGS__) +#define F77_sdot_sub(...) F77_sdot_sub_base(__VA_ARGS__) +#define F77_sdsdot_sub(...) F77_sdsdot_sub_base(__VA_ARGS__) +#define F77_sscal(...) F77_sscal_base(__VA_ARGS__) +#define F77_snrm2_sub(...) F77_snrm2_sub_base(__VA_ARGS__) +#define F77_sasum_sub(...) F77_sasum_sub_base(__VA_ARGS__) +#define F77_isamax_sub(...) F77_isamax_sub_base(__VA_ARGS__) +#define F77_scabs1_sub(...) F77_scabs1_sub_base(__VA_ARGS__) /* Double Precision */ - #define F77_drot(...) F77_drot_base(__VA_ARGS__) - #define F77_drotg(...) F77_drotg_base(__VA_ARGS__) - #define F77_drotm(...) F77_drotm_base(__VA_ARGS__) - #define F77_drotmg(...) F77_drotmg_base(__VA_ARGS__) - #define F77_dswap(...) F77_dswap_base(__VA_ARGS__) - #define F77_dcopy(...) F77_dcopy_base(__VA_ARGS__) - #define F77_daxpy(...) F77_daxpy_base(__VA_ARGS__) - #define F77_dswap(...) F77_dswap_base(__VA_ARGS__) - #define F77_dsdot_sub(...) F77_dsdot_sub_base(__VA_ARGS__) - #define F77_ddot_sub(...) F77_ddot_sub_base(__VA_ARGS__) - #define F77_dscal(...) F77_dscal_base(__VA_ARGS__) - #define F77_dnrm2_sub(...) F77_dnrm2_sub_base(__VA_ARGS__) - #define F77_dasum_sub(...) F77_dasum_sub_base(__VA_ARGS__) - #define F77_idamax_sub(...) F77_idamax_sub_base(__VA_ARGS__) - #define F77_dcabs1_sub(...) F77_dcabs1_sub_base(__VA_ARGS__) +#define F77_drot(...) F77_drot_base(__VA_ARGS__) +#define F77_drotg(...) F77_drotg_base(__VA_ARGS__) +#define F77_drotm(...) F77_drotm_base(__VA_ARGS__) +#define F77_drotmg(...) F77_drotmg_base(__VA_ARGS__) +#define F77_dswap(...) F77_dswap_base(__VA_ARGS__) +#define F77_dcopy(...) F77_dcopy_base(__VA_ARGS__) +#define F77_daxpy(...) F77_daxpy_base(__VA_ARGS__) +#define F77_dswap(...) F77_dswap_base(__VA_ARGS__) +#define F77_dsdot_sub(...) F77_dsdot_sub_base(__VA_ARGS__) +#define F77_ddot_sub(...) F77_ddot_sub_base(__VA_ARGS__) +#define F77_dscal(...) F77_dscal_base(__VA_ARGS__) +#define F77_dnrm2_sub(...) F77_dnrm2_sub_base(__VA_ARGS__) +#define F77_dasum_sub(...) F77_dasum_sub_base(__VA_ARGS__) +#define F77_idamax_sub(...) F77_idamax_sub_base(__VA_ARGS__) +#define F77_dcabs1_sub(...) F77_dcabs1_sub_base(__VA_ARGS__) /* Single Complex Precision */ - #define F77_crotg(...) F77_crotg_base(__VA_ARGS__) - #define F77_csrot(...) F77_csrot_base(__VA_ARGS__) - #define F77_cswap(...) F77_cswap_base(__VA_ARGS__) - #define F77_ccopy(...) F77_ccopy_base(__VA_ARGS__) - #define F77_caxpy(...) F77_caxpy_base(__VA_ARGS__) - #define F77_cswap(...) F77_cswap_base(__VA_ARGS__) - #define F77_cdotc_sub(...) F77_cdotc_sub_base(__VA_ARGS__) - #define F77_cdotu_sub(...) F77_cdotu_sub_base(__VA_ARGS__) - #define F77_cscal(...) F77_cscal_base(__VA_ARGS__) - #define F77_icamax_sub(...) F77_icamax_sub_base(__VA_ARGS__) - #define F77_csscal(...) F77_csscal_base(__VA_ARGS__) - #define F77_scnrm2_sub(...) F77_scnrm2_sub_base(__VA_ARGS__) - #define F77_scasum_sub(...) F77_scasum_sub_base(__VA_ARGS__) +#define F77_crotg(...) F77_crotg_base(__VA_ARGS__) +#define F77_csrot(...) F77_csrot_base(__VA_ARGS__) +#define F77_cswap(...) F77_cswap_base(__VA_ARGS__) +#define F77_ccopy(...) F77_ccopy_base(__VA_ARGS__) +#define F77_caxpy(...) F77_caxpy_base(__VA_ARGS__) +#define F77_cswap(...) F77_cswap_base(__VA_ARGS__) +#define F77_cdotc_sub(...) F77_cdotc_sub_base(__VA_ARGS__) +#define F77_cdotu_sub(...) F77_cdotu_sub_base(__VA_ARGS__) +#define F77_cscal(...) F77_cscal_base(__VA_ARGS__) +#define F77_icamax_sub(...) F77_icamax_sub_base(__VA_ARGS__) +#define F77_csscal(...) F77_csscal_base(__VA_ARGS__) +#define F77_scnrm2_sub(...) F77_scnrm2_sub_base(__VA_ARGS__) +#define F77_scasum_sub(...) F77_scasum_sub_base(__VA_ARGS__) /* Double Complex Precision */ - #define F77_zrotg(...) F77_zrotg_base(__VA_ARGS__) - #define F77_zdrot(...) F77_zdrot_base(__VA_ARGS__) - #define F77_zswap(...) F77_zswap_base(__VA_ARGS__) - #define F77_zcopy(...) F77_zcopy_base(__VA_ARGS__) - #define F77_zaxpy(...) F77_zaxpy_base(__VA_ARGS__) - #define F77_zswap(...) F77_zswap_base(__VA_ARGS__) - #define F77_zdotc_sub(...) F77_zdotc_sub_base(__VA_ARGS__) - #define F77_zdotu_sub(...) F77_zdotu_sub_base(__VA_ARGS__) - #define F77_zdscal(...) F77_zdscal_base(__VA_ARGS__) - #define F77_zscal(...) F77_zscal_base(__VA_ARGS__) - #define F77_dznrm2_sub(...) F77_dznrm2_sub_base(__VA_ARGS__) - #define F77_dzasum_sub(...) F77_dzasum_sub_base(__VA_ARGS__) - #define F77_izamax_sub(...) F77_izamax_sub_base(__VA_ARGS__) +#define F77_zrotg(...) F77_zrotg_base(__VA_ARGS__) +#define F77_zdrot(...) F77_zdrot_base(__VA_ARGS__) +#define F77_zswap(...) F77_zswap_base(__VA_ARGS__) +#define F77_zcopy(...) F77_zcopy_base(__VA_ARGS__) +#define F77_zaxpy(...) F77_zaxpy_base(__VA_ARGS__) +#define F77_zswap(...) F77_zswap_base(__VA_ARGS__) +#define F77_zdotc_sub(...) F77_zdotc_sub_base(__VA_ARGS__) +#define F77_zdotu_sub(...) F77_zdotu_sub_base(__VA_ARGS__) +#define F77_zdscal(...) F77_zdscal_base(__VA_ARGS__) +#define F77_zscal(...) F77_zscal_base(__VA_ARGS__) +#define F77_dznrm2_sub(...) F77_dznrm2_sub_base(__VA_ARGS__) +#define F77_dzasum_sub(...) F77_dzasum_sub_base(__VA_ARGS__) +#define F77_izamax_sub(...) F77_izamax_sub_base(__VA_ARGS__) /* * Level 2 Fortran variadic definitions without FCHAR */ - #define F77_sger(...) F77_sger_base(__VA_ARGS__) - #define F77_dger(...) F77_dger_base(__VA_ARGS__) - #define F77_cgerc(...) F77_cgerc_base(__VA_ARGS__) - #define F77_cgeru(...) F77_cgeru_base(__VA_ARGS__) - #define F77_zgerc(...) F77_zgerc_base(__VA_ARGS__) - #define F77_zgeru(...) F77_zgeru_base(__VA_ARGS__) +#define F77_sger(...) F77_sger_base(__VA_ARGS__) +#define F77_dger(...) F77_dger_base(__VA_ARGS__) +#define F77_cgerc(...) F77_cgerc_base(__VA_ARGS__) +#define F77_cgeru(...) F77_cgeru_base(__VA_ARGS__) +#define F77_zgerc(...) F77_zgerc_base(__VA_ARGS__) +#define F77_zgeru(...) F77_zgeru_base(__VA_ARGS__) #ifdef BLAS_FORTRAN_STRLEN_END @@ -294,75 +294,75 @@ /* Single Precision */ - #define F77_sgemv(...) F77_sgemv_base(__VA_ARGS__, 1) - #define F77_sgbmv(...) F77_sgbmv_base(__VA_ARGS__, 1) - #define F77_ssymv(...) F77_ssymv_base(__VA_ARGS__, 1) - #define F77_ssbmv(...) F77_ssbmv_base(__VA_ARGS__, 1) - #define F77_sspmv(...) F77_sspmv_base(__VA_ARGS__, 1) - #define F77_strmv(...) F77_strmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_stbmv(...) F77_stbmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_strsv(...) F77_strsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_stbsv(...) F77_stbsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_stpmv(...) F77_stpmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_stpsv(...) F77_stpsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ssyr(...) F77_ssyr_base(__VA_ARGS__, 1) - #define F77_sspr(...) F77_sspr_base(__VA_ARGS__, 1) - #define F77_sspr2(...) F77_sspr2_base(__VA_ARGS__, 1) - #define F77_ssyr2(...) F77_ssyr2_base(__VA_ARGS__, 1) + #define F77_sgemv(...) F77_sgemv_base(__VA_ARGS__, 1) + #define F77_sgbmv(...) F77_sgbmv_base(__VA_ARGS__, 1) + #define F77_ssymv(...) F77_ssymv_base(__VA_ARGS__, 1) + #define F77_ssbmv(...) F77_ssbmv_base(__VA_ARGS__, 1) + #define F77_sspmv(...) F77_sspmv_base(__VA_ARGS__, 1) + #define F77_strmv(...) F77_strmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_stbmv(...) F77_stbmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_strsv(...) F77_strsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_stbsv(...) F77_stbsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_stpmv(...) F77_stpmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_stpsv(...) F77_stpsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ssyr(...) F77_ssyr_base(__VA_ARGS__, 1) + #define F77_sspr(...) F77_sspr_base(__VA_ARGS__, 1) + #define F77_sspr2(...) F77_sspr2_base(__VA_ARGS__, 1) + #define F77_ssyr2(...) F77_ssyr2_base(__VA_ARGS__, 1) /* Double Precision */ - #define F77_dgemv(...) F77_dgemv_base(__VA_ARGS__, 1) - #define F77_dgbmv(...) F77_dgbmv_base(__VA_ARGS__, 1) - #define F77_dsymv(...) F77_dsymv_base(__VA_ARGS__, 1) - #define F77_dsbmv(...) F77_dsbmv_base(__VA_ARGS__, 1) - #define F77_dspmv(...) F77_dspmv_base(__VA_ARGS__, 1) - #define F77_dtrmv(...) F77_dtrmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtbmv(...) F77_dtbmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtrsv(...) F77_dtrsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtbsv(...) F77_dtbsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtpmv(...) F77_dtpmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtpsv(...) F77_dtpsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dsyr(...) F77_dsyr_base(__VA_ARGS__, 1) - #define F77_dspr(...) F77_dspr_base(__VA_ARGS__, 1) - #define F77_dspr2(...) F77_dspr2_base(__VA_ARGS__, 1) - #define F77_dsyr2(...) F77_dsyr2_base(__VA_ARGS__, 1) + #define F77_dgemv(...) F77_dgemv_base(__VA_ARGS__, 1) + #define F77_dgbmv(...) F77_dgbmv_base(__VA_ARGS__, 1) + #define F77_dsymv(...) F77_dsymv_base(__VA_ARGS__, 1) + #define F77_dsbmv(...) F77_dsbmv_base(__VA_ARGS__, 1) + #define F77_dspmv(...) F77_dspmv_base(__VA_ARGS__, 1) + #define F77_dtrmv(...) F77_dtrmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtbmv(...) F77_dtbmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtrsv(...) F77_dtrsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtbsv(...) F77_dtbsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtpmv(...) F77_dtpmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtpsv(...) F77_dtpsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dsyr(...) F77_dsyr_base(__VA_ARGS__, 1) + #define F77_dspr(...) F77_dspr_base(__VA_ARGS__, 1) + #define F77_dspr2(...) F77_dspr2_base(__VA_ARGS__, 1) + #define F77_dsyr2(...) F77_dsyr2_base(__VA_ARGS__, 1) /* Single Complex Precision */ - #define F77_cgemv(...) F77_cgemv_base(__VA_ARGS__, 1) - #define F77_cgbmv(...) F77_cgbmv_base(__VA_ARGS__, 1) - #define F77_chemv(...) F77_chemv_base(__VA_ARGS__, 1) - #define F77_chbmv(...) F77_chbmv_base(__VA_ARGS__, 1) - #define F77_chpmv(...) F77_chpmv_base(__VA_ARGS__, 1) - #define F77_ctrmv(...) F77_ctrmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctbmv(...) F77_ctbmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctpmv(...) F77_ctpmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctrsv(...) F77_ctrsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctbsv(...) F77_ctbsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctpsv(...) F77_ctpsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_cher(...) F77_cher_base(__VA_ARGS__, 1) - #define F77_cher2(...) F77_cher2_base(__VA_ARGS__, 1) - #define F77_chpr(...) F77_chpr_base(__VA_ARGS__, 1) - #define F77_chpr2(...) F77_chpr2_base(__VA_ARGS__, 1) + #define F77_cgemv(...) F77_cgemv_base(__VA_ARGS__, 1) + #define F77_cgbmv(...) F77_cgbmv_base(__VA_ARGS__, 1) + #define F77_chemv(...) F77_chemv_base(__VA_ARGS__, 1) + #define F77_chbmv(...) F77_chbmv_base(__VA_ARGS__, 1) + #define F77_chpmv(...) F77_chpmv_base(__VA_ARGS__, 1) + #define F77_ctrmv(...) F77_ctrmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctbmv(...) F77_ctbmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctpmv(...) F77_ctpmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctrsv(...) F77_ctrsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctbsv(...) F77_ctbsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctpsv(...) F77_ctpsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_cher(...) F77_cher_base(__VA_ARGS__, 1) + #define F77_cher2(...) F77_cher2_base(__VA_ARGS__, 1) + #define F77_chpr(...) F77_chpr_base(__VA_ARGS__, 1) + #define F77_chpr2(...) F77_chpr2_base(__VA_ARGS__, 1) /* Double Complex Precision */ - #define F77_zgemv(...) F77_zgemv_base(__VA_ARGS__, 1) - #define F77_zgbmv(...) F77_zgbmv_base(__VA_ARGS__, 1) - #define F77_zhemv(...) F77_zhemv_base(__VA_ARGS__, 1) - #define F77_zhbmv(...) F77_zhbmv_base(__VA_ARGS__, 1) - #define F77_zhpmv(...) F77_zhpmv_base(__VA_ARGS__, 1) - #define F77_ztrmv(...) F77_ztrmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztbmv(...) F77_ztbmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztpmv(...) F77_ztpmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztrsv(...) F77_ztrsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztbsv(...) F77_ztbsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztpsv(...) F77_ztpsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_zher(...) F77_zher_base(__VA_ARGS__, 1) - #define F77_zher2(...) F77_zher2_base(__VA_ARGS__, 1) - #define F77_zhpr(...) F77_zhpr_base(__VA_ARGS__, 1) - #define F77_zhpr2(...) F77_zhpr2_base(__VA_ARGS__, 1) + #define F77_zgemv(...) F77_zgemv_base(__VA_ARGS__, 1) + #define F77_zgbmv(...) F77_zgbmv_base(__VA_ARGS__, 1) + #define F77_zhemv(...) F77_zhemv_base(__VA_ARGS__, 1) + #define F77_zhbmv(...) F77_zhbmv_base(__VA_ARGS__, 1) + #define F77_zhpmv(...) F77_zhpmv_base(__VA_ARGS__, 1) + #define F77_ztrmv(...) F77_ztrmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztbmv(...) F77_ztbmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztpmv(...) F77_ztpmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztrsv(...) F77_ztrsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztbsv(...) F77_ztbsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztpsv(...) F77_ztpsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_zher(...) F77_zher_base(__VA_ARGS__, 1) + #define F77_zher2(...) F77_zher2_base(__VA_ARGS__, 1) + #define F77_zhpr(...) F77_zhpr_base(__VA_ARGS__, 1) + #define F77_zhpr2(...) F77_zhpr2_base(__VA_ARGS__, 1) /* * Level 3 Fortran variadic definitions with BLAS_FORTRAN_STRLEN_END @@ -370,123 +370,123 @@ /* Single Precision */ - #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__, 1, 1) - #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__, 1, 1) - #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__, 1, 1) - #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__, 1, 1) - #define F77_strmm(...) F77_strmm_base(__VA_ARGS__, 1, 1, 1, 1) - #define F77_strsm(...) F77_strsm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__, 1, 1) + #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__, 1, 1) + #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__, 1, 1) + #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__, 1, 1) + #define F77_strmm(...) F77_strmm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_strsm(...) F77_strsm_base(__VA_ARGS__, 1, 1, 1, 1) /* Double Precision */ - #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__, 1, 1) - #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__, 1, 1) - #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__, 1, 1) - #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__, 1, 1) - #define F77_dtrmm(...) F77_dtrmm_base(__VA_ARGS__, 1, 1, 1, 1) - #define F77_dtrsm(...) F77_dtrsm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__, 1, 1) + #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__, 1, 1) + #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__, 1, 1) + #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__, 1, 1) + #define F77_dtrmm(...) F77_dtrmm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_dtrsm(...) F77_dtrsm_base(__VA_ARGS__, 1, 1, 1, 1) /* Single Complex Precision */ - #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__, 1, 1) - #define F77_csymm(...) F77_csymm_base(__VA_ARGS__, 1, 1) - #define F77_chemm(...) F77_chemm_base(__VA_ARGS__, 1, 1) - #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__, 1, 1) - #define F77_cherk(...) F77_cherk_base(__VA_ARGS__, 1, 1) - #define F77_csyr2k(...) F77_csyr2k_base(__VA_ARGS__, 1, 1) - #define F77_cher2k(...) F77_cher2k_base(__VA_ARGS__, 1, 1) - #define F77_ctrmm(...) F77_ctrmm_base(__VA_ARGS__, 1, 1, 1, 1) - #define F77_ctrsm(...) F77_ctrsm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__, 1, 1) + #define F77_csymm(...) F77_csymm_base(__VA_ARGS__, 1, 1) + #define F77_chemm(...) F77_chemm_base(__VA_ARGS__, 1, 1) + #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__, 1, 1) + #define F77_cherk(...) F77_cherk_base(__VA_ARGS__, 1, 1) + #define F77_csyr2k(...) F77_csyr2k_base(__VA_ARGS__, 1, 1) + #define F77_cher2k(...) F77_cher2k_base(__VA_ARGS__, 1, 1) + #define F77_ctrmm(...) F77_ctrmm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_ctrsm(...) F77_ctrsm_base(__VA_ARGS__, 1, 1, 1, 1) /* Double Complex Precision */ - #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__, 1, 1) - #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__, 1, 1) - #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__, 1, 1) - #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__, 1, 1) - #define F77_zherk(...) F77_zherk_base(__VA_ARGS__, 1, 1) - #define F77_zsyr2k(...) F77_zsyr2k_base(__VA_ARGS__, 1, 1) - #define F77_zher2k(...) F77_zher2k_base(__VA_ARGS__, 1, 1) - #define F77_ztrmm(...) F77_ztrmm_base(__VA_ARGS__, 1, 1, 1, 1) - #define F77_ztrsm(...) F77_ztrsm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__, 1, 1) + #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__, 1, 1) + #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__, 1, 1) + #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__, 1, 1) + #define F77_zherk(...) F77_zherk_base(__VA_ARGS__, 1, 1) + #define F77_zsyr2k(...) F77_zsyr2k_base(__VA_ARGS__, 1, 1) + #define F77_zher2k(...) F77_zher2k_base(__VA_ARGS__, 1, 1) + #define F77_ztrmm(...) F77_ztrmm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_ztrsm(...) F77_ztrsm_base(__VA_ARGS__, 1, 1, 1, 1) #else - + /* * Level 2 Fortran variadic definitions without BLAS_FORTRAN_STRLEN_END */ /* Single Precision */ - #define F77_sgemv(...) F77_sgemv_base(__VA_ARGS__) - #define F77_sgbmv(...) F77_sgbmv_base(__VA_ARGS__) - #define F77_ssymv(...) F77_ssymv_base(__VA_ARGS__) - #define F77_ssbmv(...) F77_ssbmv_base(__VA_ARGS__) - #define F77_sspmv(...) F77_sspmv_base(__VA_ARGS__) - #define F77_strmv(...) F77_strmv_base(__VA_ARGS__) - #define F77_stbmv(...) F77_stbmv_base(__VA_ARGS__) - #define F77_strsv(...) F77_strsv_base(__VA_ARGS__) - #define F77_stbsv(...) F77_stbsv_base(__VA_ARGS__) - #define F77_stpmv(...) F77_stpmv_base(__VA_ARGS__) - #define F77_stpsv(...) F77_stpsv_base(__VA_ARGS__) - #define F77_ssyr(...) F77_ssyr_base(__VA_ARGS__) - #define F77_sspr(...) F77_sspr_base(__VA_ARGS__) - #define F77_sspr2(...) F77_sspr2_base(__VA_ARGS__) - #define F77_ssyr2(...) F77_ssyr2_base(__VA_ARGS__) + #define F77_sgemv(...) F77_sgemv_base(__VA_ARGS__) + #define F77_sgbmv(...) F77_sgbmv_base(__VA_ARGS__) + #define F77_ssymv(...) F77_ssymv_base(__VA_ARGS__) + #define F77_ssbmv(...) F77_ssbmv_base(__VA_ARGS__) + #define F77_sspmv(...) F77_sspmv_base(__VA_ARGS__) + #define F77_strmv(...) F77_strmv_base(__VA_ARGS__) + #define F77_stbmv(...) F77_stbmv_base(__VA_ARGS__) + #define F77_strsv(...) F77_strsv_base(__VA_ARGS__) + #define F77_stbsv(...) F77_stbsv_base(__VA_ARGS__) + #define F77_stpmv(...) F77_stpmv_base(__VA_ARGS__) + #define F77_stpsv(...) F77_stpsv_base(__VA_ARGS__) + #define F77_ssyr(...) F77_ssyr_base(__VA_ARGS__) + #define F77_sspr(...) F77_sspr_base(__VA_ARGS__) + #define F77_sspr2(...) F77_sspr2_base(__VA_ARGS__) + #define F77_ssyr2(...) F77_ssyr2_base(__VA_ARGS__) /* Double Precision */ - #define F77_dgemv(...) F77_dgemv_base(__VA_ARGS__) - #define F77_dgbmv(...) F77_dgbmv_base(__VA_ARGS__) - #define F77_dsymv(...) F77_dsymv_base(__VA_ARGS__) - #define F77_dsbmv(...) F77_dsbmv_base(__VA_ARGS__) - #define F77_dspmv(...) F77_dspmv_base(__VA_ARGS__) - #define F77_dtrmv(...) F77_dtrmv_base(__VA_ARGS__) - #define F77_dtbmv(...) F77_dtbmv_base(__VA_ARGS__) - #define F77_dtrsv(...) F77_dtrsv_base(__VA_ARGS__) - #define F77_dtbsv(...) F77_dtbsv_base(__VA_ARGS__) - #define F77_dtpmv(...) F77_dtpmv_base(__VA_ARGS__) - #define F77_dtpsv(...) F77_dtpsv_base(__VA_ARGS__) - #define F77_dsyr(...) F77_dsyr_base(__VA_ARGS__) - #define F77_dspr(...) F77_dspr_base(__VA_ARGS__) - #define F77_dspr2(...) F77_dspr2_base(__VA_ARGS__) - #define F77_dsyr2(...) F77_dsyr2_base(__VA_ARGS__) + #define F77_dgemv(...) F77_dgemv_base(__VA_ARGS__) + #define F77_dgbmv(...) F77_dgbmv_base(__VA_ARGS__) + #define F77_dsymv(...) F77_dsymv_base(__VA_ARGS__) + #define F77_dsbmv(...) F77_dsbmv_base(__VA_ARGS__) + #define F77_dspmv(...) F77_dspmv_base(__VA_ARGS__) + #define F77_dtrmv(...) F77_dtrmv_base(__VA_ARGS__) + #define F77_dtbmv(...) F77_dtbmv_base(__VA_ARGS__) + #define F77_dtrsv(...) F77_dtrsv_base(__VA_ARGS__) + #define F77_dtbsv(...) F77_dtbsv_base(__VA_ARGS__) + #define F77_dtpmv(...) F77_dtpmv_base(__VA_ARGS__) + #define F77_dtpsv(...) F77_dtpsv_base(__VA_ARGS__) + #define F77_dsyr(...) F77_dsyr_base(__VA_ARGS__) + #define F77_dspr(...) F77_dspr_base(__VA_ARGS__) + #define F77_dspr2(...) F77_dspr2_base(__VA_ARGS__) + #define F77_dsyr2(...) F77_dsyr2_base(__VA_ARGS__) /* Single Complex Precision */ - #define F77_cgemv(...) F77_cgemv_base(__VA_ARGS__) - #define F77_cgbmv(...) F77_cgbmv_base(__VA_ARGS__) - #define F77_chemv(...) F77_chemv_base(__VA_ARGS__) - #define F77_chbmv(...) F77_chbmv_base(__VA_ARGS__) - #define F77_chpmv(...) F77_chpmv_base(__VA_ARGS__) - #define F77_ctrmv(...) F77_ctrmv_base(__VA_ARGS__) - #define F77_ctbmv(...) F77_ctbmv_base(__VA_ARGS__) - #define F77_ctpmv(...) F77_ctpmv_base(__VA_ARGS__) - #define F77_ctrsv(...) F77_ctrsv_base(__VA_ARGS__) - #define F77_ctbsv(...) F77_ctbsv_base(__VA_ARGS__) - #define F77_ctpsv(...) F77_ctpsv_base(__VA_ARGS__) - #define F77_cher(...) F77_cher_base(__VA_ARGS__) - #define F77_cher2(...) F77_cher2_base(__VA_ARGS__) - #define F77_chpr(...) F77_chpr_base(__VA_ARGS__) - #define F77_chpr2(...) F77_chpr2_base(__VA_ARGS__) + #define F77_cgemv(...) F77_cgemv_base(__VA_ARGS__) + #define F77_cgbmv(...) F77_cgbmv_base(__VA_ARGS__) + #define F77_chemv(...) F77_chemv_base(__VA_ARGS__) + #define F77_chbmv(...) F77_chbmv_base(__VA_ARGS__) + #define F77_chpmv(...) F77_chpmv_base(__VA_ARGS__) + #define F77_ctrmv(...) F77_ctrmv_base(__VA_ARGS__) + #define F77_ctbmv(...) F77_ctbmv_base(__VA_ARGS__) + #define F77_ctpmv(...) F77_ctpmv_base(__VA_ARGS__) + #define F77_ctrsv(...) F77_ctrsv_base(__VA_ARGS__) + #define F77_ctbsv(...) F77_ctbsv_base(__VA_ARGS__) + #define F77_ctpsv(...) F77_ctpsv_base(__VA_ARGS__) + #define F77_cher(...) F77_cher_base(__VA_ARGS__) + #define F77_cher2(...) F77_cher2_base(__VA_ARGS__) + #define F77_chpr(...) F77_chpr_base(__VA_ARGS__) + #define F77_chpr2(...) F77_chpr2_base(__VA_ARGS__) /* Double Complex Precision */ - #define F77_zgemv(...) F77_zgemv_base(__VA_ARGS__) - #define F77_zgbmv(...) F77_zgbmv_base(__VA_ARGS__) - #define F77_zhemv(...) F77_zhemv_base(__VA_ARGS__) - #define F77_zhbmv(...) F77_zhbmv_base(__VA_ARGS__) - #define F77_zhpmv(...) F77_zhpmv_base(__VA_ARGS__) - #define F77_ztrmv(...) F77_ztrmv_base(__VA_ARGS__) - #define F77_ztbmv(...) F77_ztbmv_base(__VA_ARGS__) - #define F77_ztpmv(...) F77_ztpmv_base(__VA_ARGS__) - #define F77_ztrsv(...) F77_ztrsv_base(__VA_ARGS__) - #define F77_ztbsv(...) F77_ztbsv_base(__VA_ARGS__) - #define F77_ztpsv(...) F77_ztpsv_base(__VA_ARGS__) - #define F77_zher(...) F77_zher_base(__VA_ARGS__) - #define F77_zher2(...) F77_zher2_base(__VA_ARGS__) - #define F77_zhpr(...) F77_zhpr_base(__VA_ARGS__) - #define F77_zhpr2(...) F77_zhpr2_base(__VA_ARGS__) + #define F77_zgemv(...) F77_zgemv_base(__VA_ARGS__) + #define F77_zgbmv(...) F77_zgbmv_base(__VA_ARGS__) + #define F77_zhemv(...) F77_zhemv_base(__VA_ARGS__) + #define F77_zhbmv(...) F77_zhbmv_base(__VA_ARGS__) + #define F77_zhpmv(...) F77_zhpmv_base(__VA_ARGS__) + #define F77_ztrmv(...) F77_ztrmv_base(__VA_ARGS__) + #define F77_ztbmv(...) F77_ztbmv_base(__VA_ARGS__) + #define F77_ztpmv(...) F77_ztpmv_base(__VA_ARGS__) + #define F77_ztrsv(...) F77_ztrsv_base(__VA_ARGS__) + #define F77_ztbsv(...) F77_ztbsv_base(__VA_ARGS__) + #define F77_ztpsv(...) F77_ztpsv_base(__VA_ARGS__) + #define F77_zher(...) F77_zher_base(__VA_ARGS__) + #define F77_zher2(...) F77_zher2_base(__VA_ARGS__) + #define F77_zhpr(...) F77_zhpr_base(__VA_ARGS__) + #define F77_zhpr2(...) F77_zhpr2_base(__VA_ARGS__) /* * Level 3 Fortran variadic definitions without BLAS_FORTRAN_STRLEN_END @@ -494,45 +494,45 @@ /* Single Precision */ - #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__) - #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__) - #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__) - #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__) - #define F77_strmm(...) F77_strmm_base(__VA_ARGS__) - #define F77_strsm(...) F77_strsm_base(__VA_ARGS__) + #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__) + #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__) + #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__) + #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__) + #define F77_strmm(...) F77_strmm_base(__VA_ARGS__) + #define F77_strsm(...) F77_strsm_base(__VA_ARGS__) /* Double Precision */ - #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__) - #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__) - #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__) - #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__) - #define F77_dtrmm(...) F77_dtrmm_base(__VA_ARGS__) - #define F77_dtrsm(...) F77_dtrsm_base(__VA_ARGS__) + #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__) + #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__) + #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__) + #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__) + #define F77_dtrmm(...) F77_dtrmm_base(__VA_ARGS__) + #define F77_dtrsm(...) F77_dtrsm_base(__VA_ARGS__) /* Single Complex Precision */ - #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__) - #define F77_csymm(...) F77_csymm_base(__VA_ARGS__) - #define F77_chemm(...) F77_chemm_base(__VA_ARGS__) - #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__) - #define F77_cherk(...) F77_cherk_base(__VA_ARGS__) - #define F77_csyr2k(...) F77_csyr2k_base(__VA_ARGS__) - #define F77_cher2k(...) F77_cher2k_base(__VA_ARGS__) - #define F77_ctrmm(...) F77_ctrmm_base(__VA_ARGS__) - #define F77_ctrsm(...) F77_ctrsm_base(__VA_ARGS__) + #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__) + #define F77_csymm(...) F77_csymm_base(__VA_ARGS__) + #define F77_chemm(...) F77_chemm_base(__VA_ARGS__) + #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__) + #define F77_cherk(...) F77_cherk_base(__VA_ARGS__) + #define F77_csyr2k(...) F77_csyr2k_base(__VA_ARGS__) + #define F77_cher2k(...) F77_cher2k_base(__VA_ARGS__) + #define F77_ctrmm(...) F77_ctrmm_base(__VA_ARGS__) + #define F77_ctrsm(...) F77_ctrsm_base(__VA_ARGS__) /* Double Complex Precision */ - #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__) - #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__) - #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__) - #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__) - #define F77_zherk(...) F77_zherk_base(__VA_ARGS__) - #define F77_zsyr2k(...) F77_zsyr2k_base(__VA_ARGS__) - #define F77_zher2k(...) F77_zher2k_base(__VA_ARGS__) - #define F77_ztrmm(...) F77_ztrmm_base(__VA_ARGS__) - #define F77_ztrsm(...) F77_ztrsm_base(__VA_ARGS__) + #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__) + #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__) + #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__) + #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__) + #define F77_zherk(...) F77_zherk_base(__VA_ARGS__) + #define F77_zsyr2k(...) F77_zsyr2k_base(__VA_ARGS__) + #define F77_zher2k(...) F77_zher2k_base(__VA_ARGS__) + #define F77_ztrmm(...) F77_ztrmm_base(__VA_ARGS__) + #define F77_ztrsm(...) F77_ztrsm_base(__VA_ARGS__) #endif @@ -545,85 +545,90 @@ extern "C" { #endif #ifdef BLAS_FORTRAN_STRLEN_END - #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__, 1) + #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__, 1) #else - #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__) + #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__) +#endif +void +#ifdef HAS_ATTRIBUTE_WEAK_SUPPORT +__attribute__((weak)) +#endif +F77_xerbla_base(FCHAR, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t #endif -void F77_xerbla_base(FCHAR, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +); + /* * Level 1 Fortran Prototypes */ /* Single Precision */ - void F77_srot_base(FINT, float *, FINT, float *, FINT, const float *, const float *); - void F77_srotg_base(float *,float *,float *,float *); - void F77_srotm_base( FINT, float *, FINT, float *, FINT, const float *); - void F77_srotmg_base(float *,float *,float *,const float *, float *); - void F77_sswap_base( FINT, float *, FINT, float *, FINT); - void F77_scopy_base( FINT, const float *, FINT, float *, FINT); - void F77_saxpy_base( FINT, const float *, const float *, FINT, float *, FINT); - void F77_sdot_sub_base(FINT, const float *, FINT, const float *, FINT, float *); - void F77_sdsdot_sub_base( FINT, const float *, const float *, FINT, const float *, FINT, float *); - void F77_sscal_base( FINT, const float *, float *, FINT); - void F77_snrm2_sub_base( FINT, const float *, FINT, float *); - void F77_sasum_sub_base( FINT, const float *, FINT, float *); - void F77_isamax_sub_base( FINT, const float * , FINT, FINT2); +void F77_srot_base(FINT, float *, FINT, float *, FINT, const float *, const float *); +void F77_srotg_base(float *,float *,float *,float *); +void F77_srotm_base(FINT, float *, FINT, float *, FINT, const float *); +void F77_srotmg_base(float *,float *,float *,const float *, float *); +void F77_sswap_base(FINT, float *, FINT, float *, FINT); +void F77_scopy_base(FINT, const float *, FINT, float *, FINT); +void F77_saxpy_base(FINT, const float *, const float *, FINT, float *, FINT); +void F77_sdot_sub_base(FINT, const float *, FINT, const float *, FINT, float *); +void F77_sdsdot_sub_base(FINT, const float *, const float *, FINT, const float *, FINT, float *); +void F77_sscal_base(FINT, const float *, float *, FINT); +void F77_snrm2_sub_base(FINT, const float *, FINT, float *); +void F77_sasum_sub_base(FINT, const float *, FINT, float *); +void F77_isamax_sub_base(FINT, const float * , FINT, FINT2); /* Double Precision */ - void F77_drot_base(FINT, double *, FINT, double *, FINT, const double *, const double *); - void F77_drotg_base(double *,double *,double *,double *); - void F77_drotm_base( FINT, double *, FINT, double *, FINT, const double *); - void F77_drotmg_base(double *,double *,double *,const double *, double *); - void F77_dswap_base( FINT, double *, FINT, double *, FINT); - void F77_dcopy_base( FINT, const double *, FINT, double *, FINT); - void F77_daxpy_base( FINT, const double *, const double *, FINT, double *, FINT); - void F77_dswap_base( FINT, double *, FINT, double *, FINT); - void F77_dsdot_sub_base(FINT, const float *, FINT, const float *, FINT, double *); - void F77_ddot_sub_base( FINT, const double *, FINT, const double *, FINT, double *); - void F77_dscal_base( FINT, const double *, double *, FINT); - void F77_dnrm2_sub_base( FINT, const double *, FINT, double *); - void F77_dasum_sub_base( FINT, const double *, FINT, double *); - void F77_idamax_sub_base( FINT, const double * , FINT, FINT2); +void F77_drot_base(FINT, double *, FINT, double *, FINT, const double *, const double *); +void F77_drotg_base(double *,double *,double *,double *); +void F77_drotm_base(FINT, double *, FINT, double *, FINT, const double *); +void F77_drotmg_base(double *,double *,double *,const double *, double *); +void F77_dswap_base(FINT, double *, FINT, double *, FINT); +void F77_dcopy_base(FINT, const double *, FINT, double *, FINT); +void F77_daxpy_base(FINT, const double *, const double *, FINT, double *, FINT); +void F77_dswap_base(FINT, double *, FINT, double *, FINT); +void F77_dsdot_sub_base(FINT, const float *, FINT, const float *, FINT, double *); +void F77_ddot_sub_base(FINT, const double *, FINT, const double *, FINT, double *); +void F77_dscal_base(FINT, const double *, double *, FINT); +void F77_dnrm2_sub_base(FINT, const double *, FINT, double *); +void F77_dasum_sub_base(FINT, const double *, FINT, double *); +void F77_idamax_sub_base(FINT, const double * , FINT, FINT2); /* Single Complex Precision */ - void F77_crotg_base(void *, void *, float *, void *); - void F77_csrot_base(FINT, void *X, FINT, void *, FINT, const float *, const float *); - void F77_cswap_base( FINT, void *, FINT, void *, FINT); - void F77_ccopy_base( FINT, const void *, FINT, void *, FINT); - void F77_caxpy_base( FINT, const void *, const void *, FINT, void *, FINT); - void F77_cswap_base( FINT, void *, FINT, void *, FINT); - void F77_cdotc_sub_base( FINT, const void *, FINT, const void *, FINT, void *); - void F77_cdotu_sub_base( FINT, const void *, FINT, const void *, FINT, void *); - void F77_cscal_base( FINT, const void *, void *, FINT); - void F77_icamax_sub_base( FINT, const void *, FINT, FINT2); - void F77_csscal_base( FINT, const float *, void *, FINT); - void F77_scnrm2_sub_base( FINT, const void *, FINT, float *); - void F77_scasum_sub_base( FINT, const void *, FINT, float *); - void F77_scabs1_sub_base( const void *, float *); +void F77_crotg_base(void *, void *, float *, void *); +void F77_csrot_base(FINT, void *X, FINT, void *, FINT, const float *, const float *); +void F77_cswap_base(FINT, void *, FINT, void *, FINT); +void F77_ccopy_base(FINT, const void *, FINT, void *, FINT); +void F77_caxpy_base(FINT, const void *, const void *, FINT, void *, FINT); +void F77_cswap_base(FINT, void *, FINT, void *, FINT); +void F77_cdotc_sub_base(FINT, const void *, FINT, const void *, FINT, void *); +void F77_cdotu_sub_base(FINT, const void *, FINT, const void *, FINT, void *); +void F77_cscal_base(FINT, const void *, void *, FINT); +void F77_icamax_sub_base(FINT, const void *, FINT, FINT2); +void F77_csscal_base(FINT, const float *, void *, FINT); +void F77_scnrm2_sub_base(FINT, const void *, FINT, float *); +void F77_scasum_sub_base(FINT, const void *, FINT, float *); +void F77_scabs1_sub_base(const void *, float *); /* Double Complex Precision */ - void F77_zrotg_base(void *, void *, double *, void *); - void F77_zdrot_base(FINT, void *X, FINT, void *, FINT, const double *, const double *); - void F77_zswap_base( FINT, void *, FINT, void *, FINT); - void F77_zcopy_base( FINT, const void *, FINT, void *, FINT); - void F77_zaxpy_base( FINT, const void *, const void *, FINT, void *, FINT); - void F77_zswap_base( FINT, void *, FINT, void *, FINT); - void F77_zdotc_sub_base( FINT, const void *, FINT, const void *, FINT, void *); - void F77_zdotu_sub_base( FINT, const void *, FINT, const void *, FINT, void *); - void F77_zdscal_base( FINT, const double *, void *, FINT); - void F77_zscal_base( FINT, const void *, void *, FINT); - void F77_dznrm2_sub_base( FINT, const void *, FINT, double *); - void F77_dzasum_sub_base( FINT, const void *, FINT, double *); - void F77_izamax_sub_base( FINT, const void *, FINT, FINT2); - void F77_dcabs1_sub_base( const void *, double *); +void F77_zrotg_base(void *, void *, double *, void *); +void F77_zdrot_base(FINT, void *X, FINT, void *, FINT, const double *, const double *); +void F77_zswap_base(FINT, void *, FINT, void *, FINT); +void F77_zcopy_base(FINT, const void *, FINT, void *, FINT); +void F77_zaxpy_base(FINT, const void *, const void *, FINT, void *, FINT); +void F77_zswap_base(FINT, void *, FINT, void *, FINT); +void F77_zdotc_sub_base(FINT, const void *, FINT, const void *, FINT, void *); +void F77_zdotu_sub_base(FINT, const void *, FINT, const void *, FINT, void *); +void F77_zdscal_base(FINT, const double *, void *, FINT); +void F77_zscal_base(FINT, const void *, void *, FINT); +void F77_dznrm2_sub_base(FINT, const void *, FINT, double *); +void F77_dzasum_sub_base(FINT, const void *, FINT, double *); +void F77_izamax_sub_base(FINT, const void *, FINT, FINT2); +void F77_dcabs1_sub_base(const void *, double *); /* * Level 2 Fortran Prototypes @@ -631,321 +636,321 @@ void F77_xerbla_base(FCHAR, void * /* Single Precision */ - void F77_sgemv_base(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_sgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ssymv_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ssbmv_base(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_sspmv_base(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_strmv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_stbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_strsv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_stbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_stpmv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_stpsv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_sger_base( FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT); - void F77_ssyr_base(FCHAR, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_sspr_base(FCHAR, FINT, const float *, const float *, FINT, float * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_sspr2_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ssyr2_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +void F77_sgemv_base(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_sgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ssymv_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ssbmv_base(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_sspmv_base(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_strmv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_stbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_strsv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_stbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_stpmv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_stpsv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_sger_base(FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT); +void F77_ssyr_base(FCHAR, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_sspr_base(FCHAR, FINT, const float *, const float *, FINT, float * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_sspr2_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ssyr2_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); /* Double Precision */ - void F77_dgemv_base(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dsymv_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dsbmv_base(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dspmv_base(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dtrmv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtrsv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtpmv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtpsv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dger_base( FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT); - void F77_dsyr_base(FCHAR, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dspr_base(FCHAR, FINT, const double *, const double *, FINT, double * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dspr2_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dsyr2_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +void F77_dgemv_base(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dsymv_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dsbmv_base(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dspmv_base(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dtrmv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtrsv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtpmv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtpsv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dger_base(FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT); +void F77_dsyr_base(FCHAR, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dspr_base(FCHAR, FINT, const double *, const double *, FINT, double * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dspr2_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dsyr2_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); /* Single Complex Precision */ - void F77_cgemv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_cgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chemv_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chbmv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chpmv_base(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ctrmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctpmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctrsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctpsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_cgerc_base( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_cgeru_base( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_cher_base(FCHAR, FINT, const float *, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_cher2_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chpr_base(FCHAR, FINT, const float *, const void *, FINT, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chpr2_base(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +void F77_cgemv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_cgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chemv_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chbmv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chpmv_base(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ctrmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctpmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctrsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctpsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_cgerc_base(FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); +void F77_cgeru_base(FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); +void F77_cher_base(FCHAR, FINT, const float *, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_cher2_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chpr_base(FCHAR, FINT, const float *, const void *, FINT, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chpr2_base(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); /* Double Complex Precision */ - void F77_zgemv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhemv_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhbmv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhpmv_base(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ztrmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztpmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztrsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztpsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_zgerc_base( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_zgeru_base( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_zher_base(FCHAR, FINT, const double *, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zher2_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhpr_base(FCHAR, FINT, const double *, const void *, FINT, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhpr2_base(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +void F77_zgemv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhemv_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhbmv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhpmv_base(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ztrmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztpmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztrsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztpsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_zgerc_base(FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); +void F77_zgeru_base(FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); +void F77_zher_base(FCHAR, FINT, const double *, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zher2_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhpr_base(FCHAR, FINT, const double *, const void *, FINT, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhpr2_base(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); /* * Level 3 Fortran Prototypes @@ -953,165 +958,165 @@ void F77_xerbla_base(FCHAR, void * /* Single Precision */ - void F77_sgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ssymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ssyrk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ssyr2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_strmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); - void F77_strsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); +void F77_sgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ssymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ssyrk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ssyr2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_strmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); +void F77_strsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); /* Double Precision */ - void F77_dgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_dsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_dsyrk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_dsyr2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_dtrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); - void F77_dtrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); +void F77_dgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_dsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_dsyrk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_dsyr2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_dtrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); +void F77_dtrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); /* Single Complex Precision */ - void F77_cgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_csymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_chemm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_csyrk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_cherk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_csyr2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_cher2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ctrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); - void F77_ctrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); +void F77_cgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_csymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_chemm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_csyrk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_cherk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_csyr2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_cher2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ctrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); +void F77_ctrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); /* Double Complex Precision */ - void F77_zgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zhemm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zsyrk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zherk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zsyr2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zher2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ztrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); - void F77_ztrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); +void F77_zgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zhemm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zsyrk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zherk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zsyr2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zher2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ztrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); +void F77_ztrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); #ifdef __cplusplus } diff --git a/CBLAS/testing/CMakeLists.txt b/CBLAS/testing/CMakeLists.txt index 9b8cfaeb17..a2a41e05e3 100644 --- a/CBLAS/testing/CMakeLists.txt +++ b/CBLAS/testing/CMakeLists.txt @@ -52,6 +52,12 @@ if(BUILD_SINGLE) add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) + if(HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xscblat1 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xscblat2 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xscblat3 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + endif() + target_link_libraries(xscblat1 ${CBLASLIB}) target_link_libraries(xscblat2 ${CBLASLIB}) target_link_libraries(xscblat3 ${CBLASLIB}) @@ -66,6 +72,12 @@ if(BUILD_DOUBLE) add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) + if(HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xdcblat1 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xdcblat2 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xdcblat3 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + endif() + target_link_libraries(xdcblat1 ${CBLASLIB}) target_link_libraries(xdcblat2 ${CBLASLIB}) target_link_libraries(xdcblat3 ${CBLASLIB}) @@ -80,6 +92,12 @@ if(BUILD_COMPLEX) add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) + if(HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xccblat1 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xccblat2 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xccblat3 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + endif() + target_link_libraries(xccblat1 ${CBLASLIB} ${BLAS_LIBRARIES}) target_link_libraries(xccblat2 ${CBLASLIB}) target_link_libraries(xccblat3 ${CBLASLIB}) @@ -94,6 +112,12 @@ if(BUILD_COMPLEX16) add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) + if(HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xzcblat1 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xzcblat2 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xzcblat3 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + endif() + target_link_libraries(xzcblat1 ${CBLASLIB}) target_link_libraries(xzcblat2 ${CBLASLIB}) target_link_libraries(xzcblat3 ${CBLASLIB}) From 615727d1d485237e631da3777ec2c7b9ac00d43d Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sat, 19 Nov 2022 23:18:45 +0100 Subject: [PATCH 71/90] Added option to enable flat namespace on macOS. --- CMakeLists.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68a7767b8d..29e489ffee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,6 +207,22 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL Compaq) endif() endif() +# Add option to enable flat namespace for symbol resolution on macOS +if(APPLE) + option(USE_FLAT_NAMESPACE "Use flat namespaces for symbol resolution during build and runtime." OFF) + + if(USE_FLAT_NAMESPACE) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-flat_namespace") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-flat_namespace") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-flat_namespace") + else() + if(BUILD_SHARED_LIBS AND BUILD_TESTING) + message(WARNING + "LAPACK test suite might fail with shared libraries and the default two-level namespace. " + "Disable shared libraries or enable flat namespace for symbol resolution via -DUSE_FLAT_NAMESPACE=ON.") + endif() + endif() +endif() # -------------------------------------------------- set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKLIB}-targets) From 9448b7ea99f6dfaa7429f0ec15a12d690d503f56 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sat, 19 Nov 2022 23:36:27 +0100 Subject: [PATCH 72/90] Enabled flat namespace in macOS travis builds. --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 71d5bd8437..9225103ade 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,7 +29,9 @@ matrix: - os: osx name: "CMake Release Test on macOS Big Sur" osx_image: xcode12.3 - env: CMAKE_BUILD_TYPE=Release + env: + - CMAKE_BUILD_TYPE=Release + - ADDITIONAL_CMAKE_OPTIONS="-DUSE_FLAT_NAMESPACE:BOOL=ON" - os: osx osx_image: xcode12.3 name: "Makefile Test on on macOS Big Sur" @@ -62,6 +64,7 @@ script: -DBUILD_SHARED_LIBS:BOOL=ON -DCMAKE_Fortran_FLAGS:STRING="-fimplicit-none -frecursive -fcheck=all" -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + ${ADDITIONAL_CMAKE_OPTIONS} ${SRC_DIR} - ctest -D ExperimentalStart - ctest -D ExperimentalConfigure From 05043da7a6a748e3c3b11f310ca7819c7604eac1 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sat, 19 Nov 2022 23:56:42 +0100 Subject: [PATCH 73/90] Formatting. --- CBLAS/include/cblas_f77.h | 294 +++++++++++++++++++------------------- 1 file changed, 147 insertions(+), 147 deletions(-) diff --git a/CBLAS/include/cblas_f77.h b/CBLAS/include/cblas_f77.h index 283c50c520..9e8fbb6da7 100644 --- a/CBLAS/include/cblas_f77.h +++ b/CBLAS/include/cblas_f77.h @@ -46,162 +46,162 @@ * Level 1 BLAS */ -#define F77_xerbla_base F77_GLOBAL(xerbla,XERBLA) -#define F77_srotg_base F77_GLOBAL(srotg,SROTG) -#define F77_srotmg_base F77_GLOBAL(srotmg,SROTMG) -#define F77_srot_base F77_GLOBAL(srot,SROT) -#define F77_srotm_base F77_GLOBAL(srotm,SROTM) -#define F77_drotg_base F77_GLOBAL(drotg,DROTG) -#define F77_drotmg_base F77_GLOBAL(drotmg,DROTMG) -#define F77_drot_base F77_GLOBAL(drot,DROT) -#define F77_drotm_base F77_GLOBAL(drotm,DROTM) -#define F77_sswap_base F77_GLOBAL(sswap,SSWAP) -#define F77_scopy_base F77_GLOBAL(scopy,SCOPY) -#define F77_saxpy_base F77_GLOBAL(saxpy,SAXPY) -#define F77_isamax_sub_base F77_GLOBAL(isamaxsub,ISAMAXSUB) -#define F77_dswap_base F77_GLOBAL(dswap,DSWAP) -#define F77_dcopy_base F77_GLOBAL(dcopy,DCOPY) -#define F77_daxpy_base F77_GLOBAL(daxpy,DAXPY) -#define F77_idamax_sub_base F77_GLOBAL(idamaxsub,IDAMAXSUB) -#define F77_cswap_base F77_GLOBAL(cswap,CSWAP) -#define F77_ccopy_base F77_GLOBAL(ccopy,CCOPY) -#define F77_caxpy_base F77_GLOBAL(caxpy,CAXPY) -#define F77_icamax_sub_base F77_GLOBAL(icamaxsub,ICAMAXSUB) -#define F77_zswap_base F77_GLOBAL(zswap,ZSWAP) -#define F77_zcopy_base F77_GLOBAL(zcopy,ZCOPY) -#define F77_zaxpy_base F77_GLOBAL(zaxpy,ZAXPY) -#define F77_izamax_sub_base F77_GLOBAL(izamaxsub,IZAMAXSUB) -#define F77_sdot_sub_base F77_GLOBAL(sdotsub,SDOTSUB) -#define F77_ddot_sub_base F77_GLOBAL(ddotsub,DDOTSUB) -#define F77_dsdot_sub_base F77_GLOBAL(dsdotsub,DSDOTSUB) -#define F77_sscal_base F77_GLOBAL(sscal,SSCAL) -#define F77_dscal_base F77_GLOBAL(dscal,DSCAL) -#define F77_cscal_base F77_GLOBAL(cscal,CSCAL) -#define F77_zscal_base F77_GLOBAL(zscal,ZSCAL) -#define F77_csscal_base F77_GLOBAL(csscal,CSSCAL) -#define F77_zdscal_base F77_GLOBAL(zdscal,ZDSCAL) -#define F77_cdotu_sub_base F77_GLOBAL(cdotusub,CDOTUSUB) -#define F77_cdotc_sub_base F77_GLOBAL(cdotcsub,CDOTCSUB) -#define F77_zdotu_sub_base F77_GLOBAL(zdotusub,ZDOTUSUB) -#define F77_zdotc_sub_base F77_GLOBAL(zdotcsub,ZDOTCSUB) -#define F77_snrm2_sub_base F77_GLOBAL(snrm2sub,SNRM2SUB) -#define F77_sasum_sub_base F77_GLOBAL(sasumsub,SASUMSUB) -#define F77_dnrm2_sub_base F77_GLOBAL(dnrm2sub,DNRM2SUB) -#define F77_dasum_sub_base F77_GLOBAL(dasumsub,DASUMSUB) -#define F77_scnrm2_sub_base F77_GLOBAL(scnrm2sub,SCNRM2SUB) -#define F77_scasum_sub_base F77_GLOBAL(scasumsub,SCASUMSUB) -#define F77_dznrm2_sub_base F77_GLOBAL(dznrm2sub,DZNRM2SUB) -#define F77_dzasum_sub_base F77_GLOBAL(dzasumsub,DZASUMSUB) -#define F77_sdsdot_sub_base F77_GLOBAL(sdsdotsub,SDSDOTSUB) -#define F77_crotg_base F77_GLOBAL(crotg, CROTG) -#define F77_csrot_base F77_GLOBAL(csrot, CSROT) -#define F77_zrotg_base F77_GLOBAL(zrotg, ZROTG) -#define F77_zdrot_base F77_GLOBAL(zdrot, ZDROT) +#define F77_xerbla_base F77_GLOBAL(xerbla,XERBLA) +#define F77_srotg_base F77_GLOBAL(srotg,SROTG) +#define F77_srotmg_base F77_GLOBAL(srotmg,SROTMG) +#define F77_srot_base F77_GLOBAL(srot,SROT) +#define F77_srotm_base F77_GLOBAL(srotm,SROTM) +#define F77_drotg_base F77_GLOBAL(drotg,DROTG) +#define F77_drotmg_base F77_GLOBAL(drotmg,DROTMG) +#define F77_drot_base F77_GLOBAL(drot,DROT) +#define F77_drotm_base F77_GLOBAL(drotm,DROTM) +#define F77_sswap_base F77_GLOBAL(sswap,SSWAP) +#define F77_scopy_base F77_GLOBAL(scopy,SCOPY) +#define F77_saxpy_base F77_GLOBAL(saxpy,SAXPY) +#define F77_isamax_sub_base F77_GLOBAL(isamaxsub,ISAMAXSUB) +#define F77_dswap_base F77_GLOBAL(dswap,DSWAP) +#define F77_dcopy_base F77_GLOBAL(dcopy,DCOPY) +#define F77_daxpy_base F77_GLOBAL(daxpy,DAXPY) +#define F77_idamax_sub_base F77_GLOBAL(idamaxsub,IDAMAXSUB) +#define F77_cswap_base F77_GLOBAL(cswap,CSWAP) +#define F77_ccopy_base F77_GLOBAL(ccopy,CCOPY) +#define F77_caxpy_base F77_GLOBAL(caxpy,CAXPY) +#define F77_icamax_sub_base F77_GLOBAL(icamaxsub,ICAMAXSUB) +#define F77_zswap_base F77_GLOBAL(zswap,ZSWAP) +#define F77_zcopy_base F77_GLOBAL(zcopy,ZCOPY) +#define F77_zaxpy_base F77_GLOBAL(zaxpy,ZAXPY) +#define F77_izamax_sub_base F77_GLOBAL(izamaxsub,IZAMAXSUB) +#define F77_sdot_sub_base F77_GLOBAL(sdotsub,SDOTSUB) +#define F77_ddot_sub_base F77_GLOBAL(ddotsub,DDOTSUB) +#define F77_dsdot_sub_base F77_GLOBAL(dsdotsub,DSDOTSUB) +#define F77_sscal_base F77_GLOBAL(sscal,SSCAL) +#define F77_dscal_base F77_GLOBAL(dscal,DSCAL) +#define F77_cscal_base F77_GLOBAL(cscal,CSCAL) +#define F77_zscal_base F77_GLOBAL(zscal,ZSCAL) +#define F77_csscal_base F77_GLOBAL(csscal,CSSCAL) +#define F77_zdscal_base F77_GLOBAL(zdscal,ZDSCAL) +#define F77_cdotu_sub_base F77_GLOBAL(cdotusub,CDOTUSUB) +#define F77_cdotc_sub_base F77_GLOBAL(cdotcsub,CDOTCSUB) +#define F77_zdotu_sub_base F77_GLOBAL(zdotusub,ZDOTUSUB) +#define F77_zdotc_sub_base F77_GLOBAL(zdotcsub,ZDOTCSUB) +#define F77_snrm2_sub_base F77_GLOBAL(snrm2sub,SNRM2SUB) +#define F77_sasum_sub_base F77_GLOBAL(sasumsub,SASUMSUB) +#define F77_dnrm2_sub_base F77_GLOBAL(dnrm2sub,DNRM2SUB) +#define F77_dasum_sub_base F77_GLOBAL(dasumsub,DASUMSUB) +#define F77_scnrm2_sub_base F77_GLOBAL(scnrm2sub,SCNRM2SUB) +#define F77_scasum_sub_base F77_GLOBAL(scasumsub,SCASUMSUB) +#define F77_dznrm2_sub_base F77_GLOBAL(dznrm2sub,DZNRM2SUB) +#define F77_dzasum_sub_base F77_GLOBAL(dzasumsub,DZASUMSUB) +#define F77_sdsdot_sub_base F77_GLOBAL(sdsdotsub,SDSDOTSUB) +#define F77_crotg_base F77_GLOBAL(crotg, CROTG) +#define F77_csrot_base F77_GLOBAL(csrot, CSROT) +#define F77_zrotg_base F77_GLOBAL(zrotg, ZROTG) +#define F77_zdrot_base F77_GLOBAL(zdrot, ZDROT) #define F77_scabs1_sub_base F77_GLOBAL(scabs1sub, SCABS1SUB) #define F77_dcabs1_sub_base F77_GLOBAL(dcabs1sub, DCABS1SUB) /* * Level 2 BLAS */ -#define F77_ssymv_base F77_GLOBAL(ssymv,SSYMV) -#define F77_ssbmv_base F77_GLOBAL(ssbmv,SSBMV) -#define F77_sspmv_base F77_GLOBAL(sspmv,SSPMV) -#define F77_sger_base F77_GLOBAL(sger,SGER) -#define F77_ssyr_base F77_GLOBAL(ssyr,SSYR) -#define F77_sspr_base F77_GLOBAL(sspr,SSPR) -#define F77_ssyr2_base F77_GLOBAL(ssyr2,SSYR2) -#define F77_sspr2_base F77_GLOBAL(sspr2,SSPR2) -#define F77_dsymv_base F77_GLOBAL(dsymv,DSYMV) -#define F77_dsbmv_base F77_GLOBAL(dsbmv,DSBMV) -#define F77_dspmv_base F77_GLOBAL(dspmv,DSPMV) -#define F77_dger_base F77_GLOBAL(dger,DGER) -#define F77_dsyr_base F77_GLOBAL(dsyr,DSYR) -#define F77_dspr_base F77_GLOBAL(dspr,DSPR) -#define F77_dsyr2_base F77_GLOBAL(dsyr2,DSYR2) -#define F77_dspr2_base F77_GLOBAL(dspr2,DSPR2) -#define F77_chemv_base F77_GLOBAL(chemv,CHEMV) -#define F77_chbmv_base F77_GLOBAL(chbmv,CHBMV) -#define F77_chpmv_base F77_GLOBAL(chpmv,CHPMV) -#define F77_cgeru_base F77_GLOBAL(cgeru,CGERU) -#define F77_cgerc_base F77_GLOBAL(cgerc,CGERC) -#define F77_cher_base F77_GLOBAL(cher,CHER) -#define F77_chpr_base F77_GLOBAL(chpr,CHPR) -#define F77_cher2_base F77_GLOBAL(cher2,CHER2) -#define F77_chpr2_base F77_GLOBAL(chpr2,CHPR2) -#define F77_zhemv_base F77_GLOBAL(zhemv,ZHEMV) -#define F77_zhbmv_base F77_GLOBAL(zhbmv,ZHBMV) -#define F77_zhpmv_base F77_GLOBAL(zhpmv,ZHPMV) -#define F77_zgeru_base F77_GLOBAL(zgeru,ZGERU) -#define F77_zgerc_base F77_GLOBAL(zgerc,ZGERC) -#define F77_zher_base F77_GLOBAL(zher,ZHER) -#define F77_zhpr_base F77_GLOBAL(zhpr,ZHPR) -#define F77_zher2_base F77_GLOBAL(zher2,ZHER2) -#define F77_zhpr2_base F77_GLOBAL(zhpr2,ZHPR2) -#define F77_sgemv_base F77_GLOBAL(sgemv,SGEMV) -#define F77_sgbmv_base F77_GLOBAL(sgbmv,SGBMV) -#define F77_strmv_base F77_GLOBAL(strmv,STRMV) -#define F77_stbmv_base F77_GLOBAL(stbmv,STBMV) -#define F77_stpmv_base F77_GLOBAL(stpmv,STPMV) -#define F77_strsv_base F77_GLOBAL(strsv,STRSV) -#define F77_stbsv_base F77_GLOBAL(stbsv,STBSV) -#define F77_stpsv_base F77_GLOBAL(stpsv,STPSV) -#define F77_dgemv_base F77_GLOBAL(dgemv,DGEMV) -#define F77_dgbmv_base F77_GLOBAL(dgbmv,DGBMV) -#define F77_dtrmv_base F77_GLOBAL(dtrmv,DTRMV) -#define F77_dtbmv_base F77_GLOBAL(dtbmv,DTBMV) -#define F77_dtpmv_base F77_GLOBAL(dtpmv,DTPMV) -#define F77_dtrsv_base F77_GLOBAL(dtrsv,DTRSV) -#define F77_dtbsv_base F77_GLOBAL(dtbsv,DTBSV) -#define F77_dtpsv_base F77_GLOBAL(dtpsv,DTPSV) -#define F77_cgemv_base F77_GLOBAL(cgemv,CGEMV) -#define F77_cgbmv_base F77_GLOBAL(cgbmv,CGBMV) -#define F77_ctrmv_base F77_GLOBAL(ctrmv,CTRMV) -#define F77_ctbmv_base F77_GLOBAL(ctbmv,CTBMV) -#define F77_ctpmv_base F77_GLOBAL(ctpmv,CTPMV) -#define F77_ctrsv_base F77_GLOBAL(ctrsv,CTRSV) -#define F77_ctbsv_base F77_GLOBAL(ctbsv,CTBSV) -#define F77_ctpsv_base F77_GLOBAL(ctpsv,CTPSV) -#define F77_zgemv_base F77_GLOBAL(zgemv,ZGEMV) -#define F77_zgbmv_base F77_GLOBAL(zgbmv,ZGBMV) -#define F77_ztrmv_base F77_GLOBAL(ztrmv,ZTRMV) -#define F77_ztbmv_base F77_GLOBAL(ztbmv,ZTBMV) -#define F77_ztpmv_base F77_GLOBAL(ztpmv,ZTPMV) -#define F77_ztrsv_base F77_GLOBAL(ztrsv,ZTRSV) -#define F77_ztbsv_base F77_GLOBAL(ztbsv,ZTBSV) -#define F77_ztpsv_base F77_GLOBAL(ztpsv,ZTPSV) +#define F77_ssymv_base F77_GLOBAL(ssymv,SSYMV) +#define F77_ssbmv_base F77_GLOBAL(ssbmv,SSBMV) +#define F77_sspmv_base F77_GLOBAL(sspmv,SSPMV) +#define F77_sger_base F77_GLOBAL(sger,SGER) +#define F77_ssyr_base F77_GLOBAL(ssyr,SSYR) +#define F77_sspr_base F77_GLOBAL(sspr,SSPR) +#define F77_ssyr2_base F77_GLOBAL(ssyr2,SSYR2) +#define F77_sspr2_base F77_GLOBAL(sspr2,SSPR2) +#define F77_dsymv_base F77_GLOBAL(dsymv,DSYMV) +#define F77_dsbmv_base F77_GLOBAL(dsbmv,DSBMV) +#define F77_dspmv_base F77_GLOBAL(dspmv,DSPMV) +#define F77_dger_base F77_GLOBAL(dger,DGER) +#define F77_dsyr_base F77_GLOBAL(dsyr,DSYR) +#define F77_dspr_base F77_GLOBAL(dspr,DSPR) +#define F77_dsyr2_base F77_GLOBAL(dsyr2,DSYR2) +#define F77_dspr2_base F77_GLOBAL(dspr2,DSPR2) +#define F77_chemv_base F77_GLOBAL(chemv,CHEMV) +#define F77_chbmv_base F77_GLOBAL(chbmv,CHBMV) +#define F77_chpmv_base F77_GLOBAL(chpmv,CHPMV) +#define F77_cgeru_base F77_GLOBAL(cgeru,CGERU) +#define F77_cgerc_base F77_GLOBAL(cgerc,CGERC) +#define F77_cher_base F77_GLOBAL(cher,CHER) +#define F77_chpr_base F77_GLOBAL(chpr,CHPR) +#define F77_cher2_base F77_GLOBAL(cher2,CHER2) +#define F77_chpr2_base F77_GLOBAL(chpr2,CHPR2) +#define F77_zhemv_base F77_GLOBAL(zhemv,ZHEMV) +#define F77_zhbmv_base F77_GLOBAL(zhbmv,ZHBMV) +#define F77_zhpmv_base F77_GLOBAL(zhpmv,ZHPMV) +#define F77_zgeru_base F77_GLOBAL(zgeru,ZGERU) +#define F77_zgerc_base F77_GLOBAL(zgerc,ZGERC) +#define F77_zher_base F77_GLOBAL(zher,ZHER) +#define F77_zhpr_base F77_GLOBAL(zhpr,ZHPR) +#define F77_zher2_base F77_GLOBAL(zher2,ZHER2) +#define F77_zhpr2_base F77_GLOBAL(zhpr2,ZHPR2) +#define F77_sgemv_base F77_GLOBAL(sgemv,SGEMV) +#define F77_sgbmv_base F77_GLOBAL(sgbmv,SGBMV) +#define F77_strmv_base F77_GLOBAL(strmv,STRMV) +#define F77_stbmv_base F77_GLOBAL(stbmv,STBMV) +#define F77_stpmv_base F77_GLOBAL(stpmv,STPMV) +#define F77_strsv_base F77_GLOBAL(strsv,STRSV) +#define F77_stbsv_base F77_GLOBAL(stbsv,STBSV) +#define F77_stpsv_base F77_GLOBAL(stpsv,STPSV) +#define F77_dgemv_base F77_GLOBAL(dgemv,DGEMV) +#define F77_dgbmv_base F77_GLOBAL(dgbmv,DGBMV) +#define F77_dtrmv_base F77_GLOBAL(dtrmv,DTRMV) +#define F77_dtbmv_base F77_GLOBAL(dtbmv,DTBMV) +#define F77_dtpmv_base F77_GLOBAL(dtpmv,DTPMV) +#define F77_dtrsv_base F77_GLOBAL(dtrsv,DTRSV) +#define F77_dtbsv_base F77_GLOBAL(dtbsv,DTBSV) +#define F77_dtpsv_base F77_GLOBAL(dtpsv,DTPSV) +#define F77_cgemv_base F77_GLOBAL(cgemv,CGEMV) +#define F77_cgbmv_base F77_GLOBAL(cgbmv,CGBMV) +#define F77_ctrmv_base F77_GLOBAL(ctrmv,CTRMV) +#define F77_ctbmv_base F77_GLOBAL(ctbmv,CTBMV) +#define F77_ctpmv_base F77_GLOBAL(ctpmv,CTPMV) +#define F77_ctrsv_base F77_GLOBAL(ctrsv,CTRSV) +#define F77_ctbsv_base F77_GLOBAL(ctbsv,CTBSV) +#define F77_ctpsv_base F77_GLOBAL(ctpsv,CTPSV) +#define F77_zgemv_base F77_GLOBAL(zgemv,ZGEMV) +#define F77_zgbmv_base F77_GLOBAL(zgbmv,ZGBMV) +#define F77_ztrmv_base F77_GLOBAL(ztrmv,ZTRMV) +#define F77_ztbmv_base F77_GLOBAL(ztbmv,ZTBMV) +#define F77_ztpmv_base F77_GLOBAL(ztpmv,ZTPMV) +#define F77_ztrsv_base F77_GLOBAL(ztrsv,ZTRSV) +#define F77_ztbsv_base F77_GLOBAL(ztbsv,ZTBSV) +#define F77_ztpsv_base F77_GLOBAL(ztpsv,ZTPSV) /* * Level 3 BLAS */ -#define F77_chemm_base F77_GLOBAL(chemm,CHEMM) -#define F77_cherk_base F77_GLOBAL(cherk,CHERK) -#define F77_cher2k_base F77_GLOBAL(cher2k,CHER2K) -#define F77_zhemm_base F77_GLOBAL(zhemm,ZHEMM) -#define F77_zherk_base F77_GLOBAL(zherk,ZHERK) -#define F77_zher2k_base F77_GLOBAL(zher2k,ZHER2K) -#define F77_sgemm_base F77_GLOBAL(sgemm,SGEMM) -#define F77_ssymm_base F77_GLOBAL(ssymm,SSYMM) -#define F77_ssyrk_base F77_GLOBAL(ssyrk,SSYRK) -#define F77_ssyr2k_base F77_GLOBAL(ssyr2k,SSYR2K) -#define F77_strmm_base F77_GLOBAL(strmm,STRMM) -#define F77_strsm_base F77_GLOBAL(strsm,STRSM) -#define F77_dgemm_base F77_GLOBAL(dgemm,DGEMM) -#define F77_dsymm_base F77_GLOBAL(dsymm,DSYMM) -#define F77_dsyrk_base F77_GLOBAL(dsyrk,DSYRK) -#define F77_dsyr2k_base F77_GLOBAL(dsyr2k,DSYR2K) -#define F77_dtrmm_base F77_GLOBAL(dtrmm,DTRMM) -#define F77_dtrsm_base F77_GLOBAL(dtrsm,DTRSM) -#define F77_cgemm_base F77_GLOBAL(cgemm,CGEMM) -#define F77_csymm_base F77_GLOBAL(csymm,CSYMM) -#define F77_csyrk_base F77_GLOBAL(csyrk,CSYRK) -#define F77_csyr2k_base F77_GLOBAL(csyr2k,CSYR2K) -#define F77_ctrmm_base F77_GLOBAL(ctrmm,CTRMM) -#define F77_ctrsm_base F77_GLOBAL(ctrsm,CTRSM) -#define F77_zgemm_base F77_GLOBAL(zgemm,ZGEMM) -#define F77_zsymm_base F77_GLOBAL(zsymm,ZSYMM) -#define F77_zsyrk_base F77_GLOBAL(zsyrk,ZSYRK) -#define F77_zsyr2k_base F77_GLOBAL(zsyr2k,ZSYR2K) -#define F77_ztrmm_base F77_GLOBAL(ztrmm,ZTRMM) -#define F77_ztrsm_base F77_GLOBAL(ztrsm,ZTRSM) +#define F77_chemm_base F77_GLOBAL(chemm,CHEMM) +#define F77_cherk_base F77_GLOBAL(cherk,CHERK) +#define F77_cher2k_base F77_GLOBAL(cher2k,CHER2K) +#define F77_zhemm_base F77_GLOBAL(zhemm,ZHEMM) +#define F77_zherk_base F77_GLOBAL(zherk,ZHERK) +#define F77_zher2k_base F77_GLOBAL(zher2k,ZHER2K) +#define F77_sgemm_base F77_GLOBAL(sgemm,SGEMM) +#define F77_ssymm_base F77_GLOBAL(ssymm,SSYMM) +#define F77_ssyrk_base F77_GLOBAL(ssyrk,SSYRK) +#define F77_ssyr2k_base F77_GLOBAL(ssyr2k,SSYR2K) +#define F77_strmm_base F77_GLOBAL(strmm,STRMM) +#define F77_strsm_base F77_GLOBAL(strsm,STRSM) +#define F77_dgemm_base F77_GLOBAL(dgemm,DGEMM) +#define F77_dsymm_base F77_GLOBAL(dsymm,DSYMM) +#define F77_dsyrk_base F77_GLOBAL(dsyrk,DSYRK) +#define F77_dsyr2k_base F77_GLOBAL(dsyr2k,DSYR2K) +#define F77_dtrmm_base F77_GLOBAL(dtrmm,DTRMM) +#define F77_dtrsm_base F77_GLOBAL(dtrsm,DTRSM) +#define F77_cgemm_base F77_GLOBAL(cgemm,CGEMM) +#define F77_csymm_base F77_GLOBAL(csymm,CSYMM) +#define F77_csyrk_base F77_GLOBAL(csyrk,CSYRK) +#define F77_csyr2k_base F77_GLOBAL(csyr2k,CSYR2K) +#define F77_ctrmm_base F77_GLOBAL(ctrmm,CTRMM) +#define F77_ctrsm_base F77_GLOBAL(ctrsm,CTRSM) +#define F77_zgemm_base F77_GLOBAL(zgemm,ZGEMM) +#define F77_zsymm_base F77_GLOBAL(zsymm,ZSYMM) +#define F77_zsyrk_base F77_GLOBAL(zsyrk,ZSYRK) +#define F77_zsyr2k_base F77_GLOBAL(zsyr2k,ZSYR2K) +#define F77_ztrmm_base F77_GLOBAL(ztrmm,ZTRMM) +#define F77_ztrsm_base F77_GLOBAL(ztrsm,ZTRSM) /* * Level 1 Fortran variadic definitions From 12f2f0ab3738841d59ec69248005363d21c85c56 Mon Sep 17 00:00:00 2001 From: Simon Maertens Date: Sun, 20 Nov 2022 00:22:20 +0100 Subject: [PATCH 74/90] Enabled flat namespace in macOS github workflows. --- .github/workflows/cmake.yml | 1 + .travis.yml | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index dee07d154a..e9b6d1cff8 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -75,6 +75,7 @@ jobs: cmake -B build -D CMAKE_C_COMPILER="gcc-11" -D CMAKE_Fortran_COMPILER="gfortran-11" + -D USE_FLAT_NAMESPACE:BOOL=ON # - name: Use Unix Makefiles on Windows # if: ${{ matrix.os == 'windows-latest' }} diff --git a/.travis.yml b/.travis.yml index 9225103ade..71d5bd8437 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,9 +29,7 @@ matrix: - os: osx name: "CMake Release Test on macOS Big Sur" osx_image: xcode12.3 - env: - - CMAKE_BUILD_TYPE=Release - - ADDITIONAL_CMAKE_OPTIONS="-DUSE_FLAT_NAMESPACE:BOOL=ON" + env: CMAKE_BUILD_TYPE=Release - os: osx osx_image: xcode12.3 name: "Makefile Test on on macOS Big Sur" @@ -64,7 +62,6 @@ script: -DBUILD_SHARED_LIBS:BOOL=ON -DCMAKE_Fortran_FLAGS:STRING="-fimplicit-none -frecursive -fcheck=all" -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - ${ADDITIONAL_CMAKE_OPTIONS} ${SRC_DIR} - ctest -D ExperimentalStart - ctest -D ExperimentalConfigure From 7f09f40bf4259ce973f0e72fa2b751da39b34443 Mon Sep 17 00:00:00 2001 From: Bart Oldeman Date: Tue, 22 Nov 2022 01:08:27 +0000 Subject: [PATCH 75/90] Fix SLATRS3 and CLATRS3 tests Some SLATRS3 tests were failing with OpenBLAS where the RHS has a BIGNUM (=Infinity in this test) component. OpenBLAS SSCAL, when multiplying with 0, will turn this component into 0, unlike reference BLAS which turns it into NaN. While NaN looks more correct it still did not explain why DLATRS3 tests were succeeding where DSCAL has the same difference between reference and OpenBLAS. Diffing the test code unmasked a few bugs, which makes the test succeed and also unmasked a bug in the CLATRS3 test. The ZLATRS3 and DLATRS3 tests look correct to me. --- TESTING/LIN/cchktr.f | 6 +++--- TESTING/LIN/schktr.f | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/TESTING/LIN/cchktr.f b/TESTING/LIN/cchktr.f index c55b076437..4b09361d8b 100644 --- a/TESTING/LIN/cchktr.f +++ b/TESTING/LIN/cchktr.f @@ -541,7 +541,7 @@ SUBROUTINE CCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, * SRNAMT = 'CLATRS3' CALL CCOPY( N, X, 1, B, 1 ) - CALL CCOPY( N, X, 1, B, 1 ) + CALL CCOPY( N, X, 1, B( N+1 ), 1 ) CALL CSCAL( N, BIGNUM, B( N+1 ), 1 ) CALL CLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA, $ B, MAX(1, N), SCALE3, RWORK, WORK, NMAX, @@ -551,7 +551,7 @@ SUBROUTINE CCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, * IF( INFO.NE.0 ) $ CALL ALAERH( PATH, 'CLATRS3', INFO, 0, - $ UPLO // TRANS // DIAG // 'Y', N, N, + $ UPLO // TRANS // DIAG // 'N', N, N, $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, $ SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA, @@ -559,7 +559,7 @@ SUBROUTINE CCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, CALL CSSCAL( N, BIGNUM, X, 1 ) CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, $ SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA, - $ X, LDA, WORK, RESULT( 10 ) ) + $ X, LDA, WORK, RES ) RESULT( 10 ) = MAX( RESULT( 10 ), RES ) * * Print information about the tests that did not pass diff --git a/TESTING/LIN/schktr.f b/TESTING/LIN/schktr.f index 5aeb1ce88c..92d8761087 100644 --- a/TESTING/LIN/schktr.f +++ b/TESTING/LIN/schktr.f @@ -555,11 +555,11 @@ SUBROUTINE SCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, * IF( INFO.NE.0 ) $ CALL ALAERH( PATH, 'SLATRS3', INFO, 0, - $ UPLO // TRANS // DIAG // 'Y', N, N, + $ UPLO // TRANS // DIAG // 'N', N, N, $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) * CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, - $ SCALE3 ( 1 ), RWORK, ONE, B( N+1 ), LDA, + $ SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA, $ X, LDA, WORK, RESULT( 10 ) ) CALL SSCAL( N, BIGNUM, X, 1 ) CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, From 5d8a3a804befd179a28c5e4c7845e982b8d1922e Mon Sep 17 00:00:00 2001 From: Weslley S Pereira Date: Tue, 22 Nov 2022 18:19:33 -0700 Subject: [PATCH 76/90] Check for NaNs in xGECON --- SRC/cgecon.f | 7 ++++--- SRC/dgecon.f | 7 ++++--- SRC/sgecon.f | 7 ++++--- SRC/zgecon.f | 7 ++++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/SRC/cgecon.f b/SRC/cgecon.f index 48f409b680..6f426c2ab6 100644 --- a/SRC/cgecon.f +++ b/SRC/cgecon.f @@ -106,6 +106,7 @@ *> INFO is INTEGER *> = 0: successful exit *> < 0: if INFO = -i, the i-th argument had an illegal value +*> =-5: if ANORM is NAN or negative. *> \endverbatim * * Authors: @@ -153,10 +154,10 @@ SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, INTEGER ISAVE( 3 ) * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, SISNAN INTEGER ICAMAX REAL SLAMCH - EXTERNAL LSAME, ICAMAX, SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH, SISNAN * .. * .. External Subroutines .. EXTERNAL CLACN2, CLATRS, CSRSCL, XERBLA @@ -182,7 +183,7 @@ SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO ) THEN + ELSE IF( ANORM.LT.ZERO .OR. SISNAN( ANORM ) ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN diff --git a/SRC/dgecon.f b/SRC/dgecon.f index aa10dee9a2..1ad302ae3f 100644 --- a/SRC/dgecon.f +++ b/SRC/dgecon.f @@ -106,6 +106,7 @@ *> INFO is INTEGER *> = 0: successful exit *> < 0: if INFO = -i, the i-th argument had an illegal value +*> =-5: if ANORM is NAN or negative. *> \endverbatim * * Authors: @@ -152,10 +153,10 @@ SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, INTEGER ISAVE( 3 ) * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, DISNAN INTEGER IDAMAX DOUBLE PRECISION DLAMCH - EXTERNAL LSAME, IDAMAX, DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH, DISNAN * .. * .. External Subroutines .. EXTERNAL DLACN2, DLATRS, DRSCL, XERBLA @@ -175,7 +176,7 @@ SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO ) THEN + ELSE IF( ANORM.LT.ZERO .OR. DISNAN( ANORM ) ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN diff --git a/SRC/sgecon.f b/SRC/sgecon.f index a284b094be..86aeea73bb 100644 --- a/SRC/sgecon.f +++ b/SRC/sgecon.f @@ -106,6 +106,7 @@ *> INFO is INTEGER *> = 0: successful exit *> < 0: if INFO = -i, the i-th argument had an illegal value +*> =-5: if ANORM is NAN or negative. *> \endverbatim * * Authors: @@ -152,10 +153,10 @@ SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, INTEGER ISAVE( 3 ) * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, SISNAN INTEGER ISAMAX REAL SLAMCH - EXTERNAL LSAME, ISAMAX, SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH, SISNAN * .. * .. External Subroutines .. EXTERNAL SLACN2, SLATRS, SRSCL, XERBLA @@ -175,7 +176,7 @@ SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO ) THEN + ELSE IF( ANORM.LT.ZERO .OR. SISNAN( ANORM ) ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN diff --git a/SRC/zgecon.f b/SRC/zgecon.f index 3d3127f9df..9cbfe35bcd 100644 --- a/SRC/zgecon.f +++ b/SRC/zgecon.f @@ -106,6 +106,7 @@ *> INFO is INTEGER *> = 0: successful exit *> < 0: if INFO = -i, the i-th argument had an illegal value +*> =-5: if ANORM is NAN or negative. *> \endverbatim * * Authors: @@ -153,10 +154,10 @@ SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, INTEGER ISAVE( 3 ) * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, DISNAN INTEGER IZAMAX DOUBLE PRECISION DLAMCH - EXTERNAL LSAME, IZAMAX, DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH, DISNAN * .. * .. External Subroutines .. EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATRS @@ -182,7 +183,7 @@ SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO ) THEN + ELSE IF( ANORM.LT.ZERO .OR. DISNAN( ANORM ) ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN From 776a0223b0d52ec588f4a1f8b2cae0586ffa4330 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Sun, 27 Nov 2022 13:18:46 +0100 Subject: [PATCH 77/90] Fix CBLAS tests with 64-bit indexing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch replaces all instances of “int” by “CBLAS_INT” in CBLAS/testing/*.c (except for RowMajorStrg which has must remain an “int”, since it is declared as such in CBLAS main code). Interestingly, the bug would not manifest on low endian architectures, because interpreting an int64_t pointer as an int32_t pointer works there (as long as the integers fits in 31 bits, which is the case here). But on big endian architectures, this of course fails badly. --- CBLAS/testing/c_c2chke.c | 10 +-- CBLAS/testing/c_c3chke.c | 10 +-- CBLAS/testing/c_cblas1.c | 34 +++++----- CBLAS/testing/c_cblas2.c | 130 +++++++++++++++++++-------------------- CBLAS/testing/c_cblas3.c | 82 ++++++++++++------------ CBLAS/testing/c_d2chke.c | 10 +-- CBLAS/testing/c_d3chke.c | 10 +-- CBLAS/testing/c_dblas1.c | 34 +++++----- CBLAS/testing/c_dblas2.c | 104 +++++++++++++++---------------- CBLAS/testing/c_dblas3.c | 48 +++++++-------- CBLAS/testing/c_s2chke.c | 10 +-- CBLAS/testing/c_s3chke.c | 10 +-- CBLAS/testing/c_sblas1.c | 34 +++++----- CBLAS/testing/c_sblas2.c | 104 +++++++++++++++---------------- CBLAS/testing/c_sblas3.c | 48 +++++++-------- CBLAS/testing/c_xerbla.c | 10 +-- CBLAS/testing/c_z2chke.c | 10 +-- CBLAS/testing/c_z3chke.c | 10 +-- CBLAS/testing/c_zblas1.c | 34 +++++----- CBLAS/testing/c_zblas2.c | 130 +++++++++++++++++++-------------------- CBLAS/testing/c_zblas3.c | 82 ++++++++++++------------ 21 files changed, 477 insertions(+), 477 deletions(-) diff --git a/CBLAS/testing/c_c2chke.c b/CBLAS/testing/c_c2chke.c index 33a0261f51..e46bcd1493 100644 --- a/CBLAS/testing/c_c2chke.c +++ b/CBLAS/testing/c_c2chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -32,7 +32,7 @@ void F77_c2chke(char *rout) { ALPHA[2] = {0.0,0.0}, BETA[2] = {0.0,0.0}, RALPHA = 0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_c3chke.c b/CBLAS/testing/c_c3chke.c index 7057c7a2c4..b5bbc753cd 100644 --- a/CBLAS/testing/c_c3chke.c +++ b/CBLAS/testing/c_c3chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -32,7 +32,7 @@ void F77_c3chke(char * rout) { ALPHA[2] = {0.0,0.0}, BETA[2] = {0.0,0.0}, RALPHA = 0.0, RBETA = 0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_cblas1.c b/CBLAS/testing/c_cblas1.c index 81a5b843b5..75b5b73836 100644 --- a/CBLAS/testing/c_cblas1.c +++ b/CBLAS/testing/c_cblas1.c @@ -8,67 +8,67 @@ */ #include "cblas_test.h" #include "cblas.h" -void F77_caxpy(const int *N, const void *alpha, void *X, - const int *incX, void *Y, const int *incY) +void F77_caxpy(const CBLAS_INT *N, const void *alpha, void *X, + const CBLAS_INT *incX, void *Y, const CBLAS_INT *incY) { cblas_caxpy(*N, alpha, X, *incX, Y, *incY); return; } -void F77_ccopy(const int *N, void *X, const int *incX, - void *Y, const int *incY) +void F77_ccopy(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY) { cblas_ccopy(*N, X, *incX, Y, *incY); return; } -void F77_cdotc(const int *N, void *X, const int *incX, - void *Y, const int *incY, void *dotc) +void F77_cdotc(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY, void *dotc) { cblas_cdotc_sub(*N, X, *incX, Y, *incY, dotc); return; } -void F77_cdotu(const int *N, void *X, const int *incX, - void *Y, const int *incY,void *dotu) +void F77_cdotu(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY,void *dotu) { cblas_cdotu_sub(*N, X, *incX, Y, *incY, dotu); return; } -void F77_cscal(const int *N, const void * *alpha, void *X, - const int *incX) +void F77_cscal(const CBLAS_INT *N, const void * *alpha, void *X, + const CBLAS_INT *incX) { cblas_cscal(*N, alpha, X, *incX); return; } -void F77_csscal(const int *N, const float *alpha, void *X, - const int *incX) +void F77_csscal(const CBLAS_INT *N, const float *alpha, void *X, + const CBLAS_INT *incX) { cblas_csscal(*N, *alpha, X, *incX); return; } -void F77_cswap( const int *N, void *X, const int *incX, - void *Y, const int *incY) +void F77_cswap( const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY) { cblas_cswap(*N,X,*incX,Y,*incY); return; } -int F77_icamax(const int *N, const void *X, const int *incX) +CBLAS_INT F77_icamax(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_icamax(*N, X, *incX)+1); } -float F77_scnrm2(const int *N, const void *X, const int *incX) +float F77_scnrm2(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { return cblas_scnrm2(*N, X, *incX); } -float F77_scasum(const int *N, void *X, const int *incX) +float F77_scasum(const CBLAS_INT *N, void *X, const CBLAS_INT *incX) { return cblas_scasum(*N, X, *incX); } diff --git a/CBLAS/testing/c_cblas2.c b/CBLAS/testing/c_cblas2.c index bb7e644854..b4c8734c0d 100644 --- a/CBLAS/testing/c_cblas2.c +++ b/CBLAS/testing/c_cblas2.c @@ -8,13 +8,13 @@ #include "cblas.h" #include "cblas_test.h" -void F77_cgemv(int *layout, char *transp, int *m, int *n, +void F77_cgemv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, const void *alpha, - CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx, - const void *beta, void *y, int *incy) { + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, const void *x, CBLAS_INT *incx, + const void *beta, void *y, CBLAS_INT *incy) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -38,13 +38,13 @@ void F77_cgemv(int *layout, char *transp, int *m, int *n, *m, *n, alpha, a, *lda, x, *incx, beta, y, *incy ); } -void F77_cgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *x, int *incx, - CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, int *incy) { +void F77_cgbmv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, CBLAS_INT *kl, CBLAS_INT *ku, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, + CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy) { CBLAS_TEST_COMPLEX *A; - int i,j,irow,jcol,LDA; + CBLAS_INT i,j,irow,jcol,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -85,12 +85,12 @@ void F77_cgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, *incx, beta, y, *incy ); } -void F77_cgeru(int *layout, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy, - CBLAS_TEST_COMPLEX *a, int *lda){ +void F77_cgeru(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda){ CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -114,11 +114,11 @@ void F77_cgeru(int *layout, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, cblas_cgeru( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); } -void F77_cgerc(int *layout, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy, - CBLAS_TEST_COMPLEX *a, int *lda) { +void F77_cgerc(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -142,12 +142,12 @@ void F77_cgerc(int *layout, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, cblas_cgerc( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); } -void F77_chemv(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx, CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, int *incy){ +void F77_chemv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx, CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -172,13 +172,13 @@ void F77_chemv(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, beta, y, *incy ); } -void F77_chbmv(int *layout, char *uplow, int *n, int *k, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *y, int *incy){ +void F77_chbmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_COMPLEX *A; -int i,irow,j,jcol,LDA; +CBLAS_INT i,irow,j,jcol,LDA; CBLAS_UPLO uplo; @@ -236,12 +236,12 @@ int i,irow,j,jcol,LDA; beta, y, *incy ); } -void F77_chpmv(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, int *incx, - CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, int *incy){ +void F77_chpmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, + CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_COMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -292,11 +292,11 @@ void F77_chpmv(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, *incy ); } -void F77_ctbmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx) { +void F77_ctbmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -355,12 +355,12 @@ void F77_ctbmv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctbmv(UNDEFINED, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ctbsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx) { +void F77_ctbsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -419,10 +419,10 @@ void F77_ctbsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctbsv(UNDEFINED, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ctpmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, int *incx) { +void F77_ctpmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -474,10 +474,10 @@ void F77_ctpmv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctpmv( UNDEFINED, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_ctpsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, int *incx) { +void F77_ctpsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -529,11 +529,11 @@ void F77_ctpsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctpsv( UNDEFINED, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_ctrmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx) { +void F77_ctrmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -558,11 +558,11 @@ void F77_ctrmv(int *layout, char *uplow, char *transp, char *diagn, else cblas_ctrmv(UNDEFINED, uplo, trans, diag, *n, a, *lda, x, *incx); } -void F77_ctrsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx) { +void F77_ctrsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -588,10 +588,10 @@ void F77_ctrsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctrsv(UNDEFINED, uplo, trans, diag, *n, a, *lda, x, *incx ); } -void F77_chpr(int *layout, char *uplow, int *n, float *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *ap) { +void F77_chpr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *ap) { CBLAS_TEST_COMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -663,11 +663,11 @@ void F77_chpr(int *layout, char *uplow, int *n, float *alpha, cblas_chpr(UNDEFINED, uplo, *n, *alpha, x, *incx, ap ); } -void F77_chpr2(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy, +void F77_chpr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy, CBLAS_TEST_COMPLEX *ap) { CBLAS_TEST_COMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -740,10 +740,10 @@ void F77_chpr2(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, cblas_chpr2( UNDEFINED, uplo, *n, alpha, x, *incx, y, *incy, ap ); } -void F77_cher(int *layout, char *uplow, int *n, float *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *a, int *lda) { +void F77_cher(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -772,12 +772,12 @@ void F77_cher(int *layout, char *uplow, int *n, float *alpha, cblas_cher( UNDEFINED, uplo, *n, *alpha, x, *incx, a, *lda ); } -void F77_cher2(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy, - CBLAS_TEST_COMPLEX *a, int *lda) { +void F77_cher2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); diff --git a/CBLAS/testing/c_cblas3.c b/CBLAS/testing/c_cblas3.c index e0e41230f4..f758dc9ebc 100644 --- a/CBLAS/testing/c_cblas3.c +++ b/CBLAS/testing/c_cblas3.c @@ -11,13 +11,13 @@ #define TEST_ROW_MJR 1 #define UNDEFINED -1 -void F77_cgemm(int *layout, char *transpa, char *transpb, int *m, int *n, - int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_cgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_INT *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_COMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_TRANSPOSE transa, transb; get_transpose_type(transpa, &transa); @@ -87,13 +87,13 @@ void F77_cgemm(int *layout, char *transpa, char *transpb, int *m, int *n, cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_chemm(int *layout, char *rtlf, char *uplow, int *m, int *n, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_chemm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_COMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -151,13 +151,13 @@ void F77_chemm(int *layout, char *rtlf, char *uplow, int *m, int *n, cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_csymm(int *layout, char *rtlf, char *uplow, int *m, int *n, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_csymm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_COMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -206,11 +206,11 @@ void F77_csymm(int *layout, char *rtlf, char *uplow, int *m, int *n, beta, c, *ldc ); } -void F77_cherk(int *layout, char *uplow, char *transp, int *n, int *k, - float *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - float *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_cherk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + float *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + float *beta, CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; CBLAS_TEST_COMPLEX *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -262,11 +262,11 @@ void F77_cherk(int *layout, char *uplow, char *transp, int *n, int *k, c, *ldc ); } -void F77_csyrk(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_csyrk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; CBLAS_TEST_COMPLEX *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -317,11 +317,11 @@ void F77_csyrk(int *layout, char *uplow, char *transp, int *n, int *k, cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta, c, *ldc ); } -void F77_cher2k(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, float *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_cher2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, float *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; CBLAS_TEST_COMPLEX *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -381,11 +381,11 @@ void F77_cher2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_csyr2k(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_csyr2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; CBLAS_TEST_COMPLEX *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -445,10 +445,10 @@ void F77_csyr2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_ctrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, - int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { - int i,j,LDA,LDB; +void F77_ctrmm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, + CBLAS_INT *lda, CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; CBLAS_TEST_COMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; @@ -504,10 +504,10 @@ void F77_ctrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, a, *lda, b, *ldb); } -void F77_ctrsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, - int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { - int i,j,LDA,LDB; +void F77_ctrsm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, + CBLAS_INT *lda, CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; CBLAS_TEST_COMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; diff --git a/CBLAS/testing/c_d2chke.c b/CBLAS/testing/c_d2chke.c index 73e8aceec5..90d9c3ca70 100644 --- a/CBLAS/testing/c_d2chke.c +++ b/CBLAS/testing/c_d2chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -30,7 +30,7 @@ void F77_d2chke(char *rout) { X[2] = {0.0,0.0}, Y[2] = {0.0,0.0}, ALPHA=0.0, BETA=0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_d3chke.c b/CBLAS/testing/c_d3chke.c index 9a1ba3cc59..c9056c85ce 100644 --- a/CBLAS/testing/c_d3chke.c +++ b/CBLAS/testing/c_d3chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -30,7 +30,7 @@ void F77_d3chke(char *rout) { B[2] = {0.0,0.0}, C[2] = {0.0,0.0}, ALPHA=0.0, BETA=0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_dblas1.c b/CBLAS/testing/c_dblas1.c index deb7851257..cf03549fa8 100644 --- a/CBLAS/testing/c_dblas1.c +++ b/CBLAS/testing/c_dblas1.c @@ -8,32 +8,32 @@ */ #include "cblas_test.h" #include "cblas.h" -double F77_dasum(const int *N, double *X, const int *incX) +double F77_dasum(const CBLAS_INT *N, double *X, const CBLAS_INT *incX) { return cblas_dasum(*N, X, *incX); } -void F77_daxpy(const int *N, const double *alpha, const double *X, - const int *incX, double *Y, const int *incY) +void F77_daxpy(const CBLAS_INT *N, const double *alpha, const double *X, + const CBLAS_INT *incX, double *Y, const CBLAS_INT *incY) { cblas_daxpy(*N, *alpha, X, *incX, Y, *incY); return; } -void F77_dcopy(const int *N, double *X, const int *incX, - double *Y, const int *incY) +void F77_dcopy(const CBLAS_INT *N, double *X, const CBLAS_INT *incX, + double *Y, const CBLAS_INT *incY) { cblas_dcopy(*N, X, *incX, Y, *incY); return; } -double F77_ddot(const int *N, const double *X, const int *incX, - const double *Y, const int *incY) +double F77_ddot(const CBLAS_INT *N, const double *X, const CBLAS_INT *incX, + const double *Y, const CBLAS_INT *incY) { return cblas_ddot(*N, X, *incX, Y, *incY); } -double F77_dnrm2(const int *N, const double *X, const int *incX) +double F77_dnrm2(const CBLAS_INT *N, const double *X, const CBLAS_INT *incX) { return cblas_dnrm2(*N, X, *incX); } @@ -44,39 +44,39 @@ void F77_drotg( double *a, double *b, double *c, double *s) return; } -void F77_drot( const int *N, double *X, const int *incX, double *Y, - const int *incY, const double *c, const double *s) +void F77_drot( const CBLAS_INT *N, double *X, const CBLAS_INT *incX, double *Y, + const CBLAS_INT *incY, const double *c, const double *s) { cblas_drot(*N,X,*incX,Y,*incY,*c,*s); return; } -void F77_dscal(const int *N, const double *alpha, double *X, - const int *incX) +void F77_dscal(const CBLAS_INT *N, const double *alpha, double *X, + const CBLAS_INT *incX) { cblas_dscal(*N, *alpha, X, *incX); return; } -void F77_dswap( const int *N, double *X, const int *incX, - double *Y, const int *incY) +void F77_dswap( const CBLAS_INT *N, double *X, const CBLAS_INT *incX, + double *Y, const CBLAS_INT *incY) { cblas_dswap(*N,X,*incX,Y,*incY); return; } -double F77_dzasum(const int *N, void *X, const int *incX) +double F77_dzasum(const CBLAS_INT *N, void *X, const CBLAS_INT *incX) { return cblas_dzasum(*N, X, *incX); } -double F77_dznrm2(const int *N, const void *X, const int *incX) +double F77_dznrm2(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { return cblas_dznrm2(*N, X, *incX); } -int F77_idamax(const int *N, const double *X, const int *incX) +CBLAS_INT F77_idamax(const CBLAS_INT *N, const double *X, const CBLAS_INT *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_idamax(*N, X, *incX)+1); diff --git a/CBLAS/testing/c_dblas2.c b/CBLAS/testing/c_dblas2.c index 835ba19f34..7a3e278e10 100644 --- a/CBLAS/testing/c_dblas2.c +++ b/CBLAS/testing/c_dblas2.c @@ -8,12 +8,12 @@ #include "cblas.h" #include "cblas_test.h" -void F77_dgemv(int *layout, char *transp, int *m, int *n, double *alpha, - double *a, int *lda, double *x, int *incx, double *beta, - double *y, int *incy ) { +void F77_dgemv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, double *alpha, + double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx, double *beta, + double *y, CBLAS_INT *incy ) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -35,11 +35,11 @@ void F77_dgemv(int *layout, char *transp, int *m, int *n, double *alpha, *m, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy ); } -void F77_dger(int *layout, int *m, int *n, double *alpha, double *x, int *incx, - double *y, int *incy, double *a, int *lda ) { +void F77_dger(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, double *alpha, double *x, CBLAS_INT *incx, + double *y, CBLAS_INT *incy, double *a, CBLAS_INT *lda ) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -60,10 +60,10 @@ void F77_dger(int *layout, int *m, int *n, double *alpha, double *x, int *incx, cblas_dger( CblasColMajor, *m, *n, *alpha, x, *incx, y, *incy, a, *lda ); } -void F77_dtrmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, double *a, int *lda, double *x, int *incx) { +void F77_dtrmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -88,10 +88,10 @@ void F77_dtrmv(int *layout, char *uplow, char *transp, char *diagn, } } -void F77_dtrsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, double *a, int *lda, double *x, int *incx ) { +void F77_dtrsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx ) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -112,11 +112,11 @@ void F77_dtrsv(int *layout, char *uplow, char *transp, char *diagn, else cblas_dtrsv(CblasColMajor, uplo, trans, diag, *n, a, *lda, x, *incx ); } -void F77_dsymv(int *layout, char *uplow, int *n, double *alpha, double *a, - int *lda, double *x, int *incx, double *beta, double *y, - int *incy) { +void F77_dsymv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *a, + CBLAS_INT *lda, double *x, CBLAS_INT *incx, double *beta, double *y, + CBLAS_INT *incy) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -136,10 +136,10 @@ void F77_dsymv(int *layout, char *uplow, int *n, double *alpha, double *a, *beta, y, *incy ); } -void F77_dsyr(int *layout, char *uplow, int *n, double *alpha, double *x, - int *incx, double *a, int *lda) { +void F77_dsyr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *x, + CBLAS_INT *incx, double *a, CBLAS_INT *lda) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -160,10 +160,10 @@ void F77_dsyr(int *layout, char *uplow, int *n, double *alpha, double *x, cblas_dsyr(CblasColMajor, uplo, *n, *alpha, x, *incx, a, *lda); } -void F77_dsyr2(int *layout, char *uplow, int *n, double *alpha, double *x, - int *incx, double *y, int *incy, double *a, int *lda) { +void F77_dsyr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *x, + CBLAS_INT *incx, double *y, CBLAS_INT *incy, double *a, CBLAS_INT *lda) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -184,12 +184,12 @@ void F77_dsyr2(int *layout, char *uplow, int *n, double *alpha, double *x, cblas_dsyr2(CblasColMajor, uplo, *n, *alpha, x, *incx, y, *incy, a, *lda); } -void F77_dgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, - double *alpha, double *a, int *lda, double *x, int *incx, - double *beta, double *y, int *incy ) { +void F77_dgbmv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, CBLAS_INT *kl, CBLAS_INT *ku, + double *alpha, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx, + double *beta, double *y, CBLAS_INT *incy ) { double *A; - int i,irow,j,jcol,LDA; + CBLAS_INT i,irow,j,jcol,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -222,10 +222,10 @@ void F77_dgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, a, *lda, x, *incx, *beta, y, *incy ); } -void F77_dtbmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, double *a, int *lda, double *x, int *incx) { +void F77_dtbmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx) { double *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -268,10 +268,10 @@ void F77_dtbmv(int *layout, char *uplow, char *transp, char *diagn, cblas_dtbmv(CblasColMajor, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_dtbsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, double *a, int *lda, double *x, int *incx) { +void F77_dtbsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx) { double *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -314,11 +314,11 @@ void F77_dtbsv(int *layout, char *uplow, char *transp, char *diagn, cblas_dtbsv(CblasColMajor, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_dsbmv(int *layout, char *uplow, int *n, int *k, double *alpha, - double *a, int *lda, double *x, int *incx, double *beta, - double *y, int *incy) { +void F77_dsbmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_INT *k, double *alpha, + double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx, double *beta, + double *y, CBLAS_INT *incy) { double *A; - int i,j,irow,jcol,LDA; + CBLAS_INT i,j,irow,jcol,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -359,10 +359,10 @@ void F77_dsbmv(int *layout, char *uplow, int *n, int *k, double *alpha, *beta, y, *incy ); } -void F77_dspmv(int *layout, char *uplow, int *n, double *alpha, double *ap, - double *x, int *incx, double *beta, double *y, int *incy) { +void F77_dspmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *ap, + double *x, CBLAS_INT *incx, double *beta, double *y, CBLAS_INT *incy) { double *A,*AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -397,10 +397,10 @@ void F77_dspmv(int *layout, char *uplow, int *n, double *alpha, double *ap, *incy ); } -void F77_dtpmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, double *ap, double *x, int *incx) { +void F77_dtpmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, double *ap, double *x, CBLAS_INT *incx) { double *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -437,10 +437,10 @@ void F77_dtpmv(int *layout, char *uplow, char *transp, char *diagn, cblas_dtpmv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_dtpsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, double *ap, double *x, int *incx) { +void F77_dtpsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, double *ap, double *x, CBLAS_INT *incx) { double *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -478,10 +478,10 @@ void F77_dtpsv(int *layout, char *uplow, char *transp, char *diagn, cblas_dtpsv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_dspr(int *layout, char *uplow, int *n, double *alpha, double *x, - int *incx, double *ap ){ +void F77_dspr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *x, + CBLAS_INT *incx, double *ap ){ double *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -530,10 +530,10 @@ void F77_dspr(int *layout, char *uplow, int *n, double *alpha, double *x, cblas_dspr( CblasColMajor, uplo, *n, *alpha, x, *incx, ap ); } -void F77_dspr2(int *layout, char *uplow, int *n, double *alpha, double *x, - int *incx, double *y, int *incy, double *ap ){ +void F77_dspr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *x, + CBLAS_INT *incx, double *y, CBLAS_INT *incy, double *ap ){ double *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); diff --git a/CBLAS/testing/c_dblas3.c b/CBLAS/testing/c_dblas3.c index 8a2c37688d..49c5a698fd 100644 --- a/CBLAS/testing/c_dblas3.c +++ b/CBLAS/testing/c_dblas3.c @@ -11,12 +11,12 @@ #define TEST_ROW_MJR 1 #define UNDEFINED -1 -void F77_dgemm(int *layout, char *transpa, char *transpb, int *m, int *n, - int *k, double *alpha, double *a, int *lda, double *b, int *ldb, - double *beta, double *c, int *ldc ) { +void F77_dgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_INT *k, double *alpha, double *a, CBLAS_INT *lda, double *b, CBLAS_INT *ldb, + double *beta, double *c, CBLAS_INT *ldc ) { double *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_TRANSPOSE transa, transb; get_transpose_type(transpa, &transa); @@ -73,12 +73,12 @@ void F77_dgemm(int *layout, char *transpa, char *transpb, int *m, int *n, cblas_dgemm( UNDEFINED, transa, transb, *m, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_dsymm(int *layout, char *rtlf, char *uplow, int *m, int *n, - double *alpha, double *a, int *lda, double *b, int *ldb, - double *beta, double *c, int *ldc ) { +void F77_dsymm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + double *alpha, double *a, CBLAS_INT *lda, double *b, CBLAS_INT *ldb, + double *beta, double *c, CBLAS_INT *ldc ) { double *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -127,11 +127,11 @@ void F77_dsymm(int *layout, char *rtlf, char *uplow, int *m, int *n, *beta, c, *ldc ); } -void F77_dsyrk(int *layout, char *uplow, char *transp, int *n, int *k, - double *alpha, double *a, int *lda, - double *beta, double *c, int *ldc ) { +void F77_dsyrk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + double *alpha, double *a, CBLAS_INT *lda, + double *beta, double *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; double *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -175,10 +175,10 @@ void F77_dsyrk(int *layout, char *uplow, char *transp, int *n, int *k, c, *ldc ); } -void F77_dsyr2k(int *layout, char *uplow, char *transp, int *n, int *k, - double *alpha, double *a, int *lda, double *b, int *ldb, - double *beta, double *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_dsyr2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + double *alpha, double *a, CBLAS_INT *lda, double *b, CBLAS_INT *ldb, + double *beta, double *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; double *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -230,10 +230,10 @@ void F77_dsyr2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_dsyr2k(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_dtrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, double *alpha, double *a, int *lda, double *b, - int *ldb) { - int i,j,LDA,LDB; +void F77_dtrmm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, double *alpha, double *a, CBLAS_INT *lda, double *b, + CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; double *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; @@ -281,10 +281,10 @@ void F77_dtrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, a, *lda, b, *ldb); } -void F77_dtrsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, double *alpha, double *a, int *lda, double *b, - int *ldb) { - int i,j,LDA,LDB; +void F77_dtrsm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, double *alpha, double *a, CBLAS_INT *lda, double *b, + CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; double *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; diff --git a/CBLAS/testing/c_s2chke.c b/CBLAS/testing/c_s2chke.c index a781bd505f..adb09a6049 100644 --- a/CBLAS/testing/c_s2chke.c +++ b/CBLAS/testing/c_s2chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -30,7 +30,7 @@ void F77_s2chke(char *rout) { X[2] = {0.0,0.0}, Y[2] = {0.0,0.0}, ALPHA=0.0, BETA=0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_s3chke.c b/CBLAS/testing/c_s3chke.c index 425d6a7023..f95277e9c8 100644 --- a/CBLAS/testing/c_s3chke.c +++ b/CBLAS/testing/c_s3chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -30,7 +30,7 @@ void F77_s3chke(char *rout) { B[2] = {0.0,0.0}, C[2] = {0.0,0.0}, ALPHA=0.0, BETA=0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_sblas1.c b/CBLAS/testing/c_sblas1.c index 2e63d98148..e5a88766ff 100644 --- a/CBLAS/testing/c_sblas1.c +++ b/CBLAS/testing/c_sblas1.c @@ -8,42 +8,42 @@ */ #include "cblas_test.h" #include "cblas.h" -float F77_sasum(const int *N, float *X, const int *incX) +float F77_sasum(const CBLAS_INT *N, float *X, const CBLAS_INT *incX) { return cblas_sasum(*N, X, *incX); } -void F77_saxpy(const int *N, const float *alpha, const float *X, - const int *incX, float *Y, const int *incY) +void F77_saxpy(const CBLAS_INT *N, const float *alpha, const float *X, + const CBLAS_INT *incX, float *Y, const CBLAS_INT *incY) { cblas_saxpy(*N, *alpha, X, *incX, Y, *incY); return; } -float F77_scasum(const int *N, void *X, const int *incX) +float F77_scasum(const CBLAS_INT *N, void *X, const CBLAS_INT *incX) { return cblas_scasum(*N, X, *incX); } -float F77_scnrm2(const int *N, const void *X, const int *incX) +float F77_scnrm2(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { return cblas_scnrm2(*N, X, *incX); } -void F77_scopy(const int *N, const float *X, const int *incX, - float *Y, const int *incY) +void F77_scopy(const CBLAS_INT *N, const float *X, const CBLAS_INT *incX, + float *Y, const CBLAS_INT *incY) { cblas_scopy(*N, X, *incX, Y, *incY); return; } -float F77_sdot(const int *N, const float *X, const int *incX, - const float *Y, const int *incY) +float F77_sdot(const CBLAS_INT *N, const float *X, const CBLAS_INT *incX, + const float *Y, const CBLAS_INT *incY) { return cblas_sdot(*N, X, *incX, Y, *incY); } -float F77_snrm2(const int *N, const float *X, const int *incX) +float F77_snrm2(const CBLAS_INT *N, const float *X, const CBLAS_INT *incX) { return cblas_snrm2(*N, X, *incX); } @@ -54,28 +54,28 @@ void F77_srotg( float *a, float *b, float *c, float *s) return; } -void F77_srot( const int *N, float *X, const int *incX, float *Y, - const int *incY, const float *c, const float *s) +void F77_srot( const CBLAS_INT *N, float *X, const CBLAS_INT *incX, float *Y, + const CBLAS_INT *incY, const float *c, const float *s) { cblas_srot(*N,X,*incX,Y,*incY,*c,*s); return; } -void F77_sscal(const int *N, const float *alpha, float *X, - const int *incX) +void F77_sscal(const CBLAS_INT *N, const float *alpha, float *X, + const CBLAS_INT *incX) { cblas_sscal(*N, *alpha, X, *incX); return; } -void F77_sswap( const int *N, float *X, const int *incX, - float *Y, const int *incY) +void F77_sswap( const CBLAS_INT *N, float *X, const CBLAS_INT *incX, + float *Y, const CBLAS_INT *incY) { cblas_sswap(*N,X,*incX,Y,*incY); return; } -int F77_isamax(const int *N, const float *X, const int *incX) +CBLAS_INT F77_isamax(const CBLAS_INT *N, const float *X, const CBLAS_INT *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_isamax(*N, X, *incX)+1); diff --git a/CBLAS/testing/c_sblas2.c b/CBLAS/testing/c_sblas2.c index f119504872..00bb4ca13e 100644 --- a/CBLAS/testing/c_sblas2.c +++ b/CBLAS/testing/c_sblas2.c @@ -8,12 +8,12 @@ #include "cblas.h" #include "cblas_test.h" -void F77_sgemv(int *layout, char *transp, int *m, int *n, float *alpha, - float *a, int *lda, float *x, int *incx, float *beta, - float *y, int *incy ) { +void F77_sgemv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, float *alpha, + float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx, float *beta, + float *y, CBLAS_INT *incy ) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -35,11 +35,11 @@ void F77_sgemv(int *layout, char *transp, int *m, int *n, float *alpha, *m, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy ); } -void F77_sger(int *layout, int *m, int *n, float *alpha, float *x, int *incx, - float *y, int *incy, float *a, int *lda ) { +void F77_sger(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, float *alpha, float *x, CBLAS_INT *incx, + float *y, CBLAS_INT *incy, float *a, CBLAS_INT *lda ) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -60,10 +60,10 @@ void F77_sger(int *layout, int *m, int *n, float *alpha, float *x, int *incx, cblas_sger( CblasColMajor, *m, *n, *alpha, x, *incx, y, *incy, a, *lda ); } -void F77_strmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, float *a, int *lda, float *x, int *incx) { +void F77_strmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -88,10 +88,10 @@ void F77_strmv(int *layout, char *uplow, char *transp, char *diagn, } } -void F77_strsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, float *a, int *lda, float *x, int *incx ) { +void F77_strsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx ) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -112,11 +112,11 @@ void F77_strsv(int *layout, char *uplow, char *transp, char *diagn, else cblas_strsv(CblasColMajor, uplo, trans, diag, *n, a, *lda, x, *incx ); } -void F77_ssymv(int *layout, char *uplow, int *n, float *alpha, float *a, - int *lda, float *x, int *incx, float *beta, float *y, - int *incy) { +void F77_ssymv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *a, + CBLAS_INT *lda, float *x, CBLAS_INT *incx, float *beta, float *y, + CBLAS_INT *incy) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -136,10 +136,10 @@ void F77_ssymv(int *layout, char *uplow, int *n, float *alpha, float *a, *beta, y, *incy ); } -void F77_ssyr(int *layout, char *uplow, int *n, float *alpha, float *x, - int *incx, float *a, int *lda) { +void F77_ssyr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *x, + CBLAS_INT *incx, float *a, CBLAS_INT *lda) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -160,10 +160,10 @@ void F77_ssyr(int *layout, char *uplow, int *n, float *alpha, float *x, cblas_ssyr(CblasColMajor, uplo, *n, *alpha, x, *incx, a, *lda); } -void F77_ssyr2(int *layout, char *uplow, int *n, float *alpha, float *x, - int *incx, float *y, int *incy, float *a, int *lda) { +void F77_ssyr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *x, + CBLAS_INT *incx, float *y, CBLAS_INT *incy, float *a, CBLAS_INT *lda) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -184,12 +184,12 @@ void F77_ssyr2(int *layout, char *uplow, int *n, float *alpha, float *x, cblas_ssyr2(CblasColMajor, uplo, *n, *alpha, x, *incx, y, *incy, a, *lda); } -void F77_sgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, - float *alpha, float *a, int *lda, float *x, int *incx, - float *beta, float *y, int *incy ) { +void F77_sgbmv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, CBLAS_INT *kl, CBLAS_INT *ku, + float *alpha, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx, + float *beta, float *y, CBLAS_INT *incy ) { float *A; - int i,irow,j,jcol,LDA; + CBLAS_INT i,irow,j,jcol,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -222,10 +222,10 @@ void F77_sgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, a, *lda, x, *incx, *beta, y, *incy ); } -void F77_stbmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, float *a, int *lda, float *x, int *incx) { +void F77_stbmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx) { float *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -268,10 +268,10 @@ void F77_stbmv(int *layout, char *uplow, char *transp, char *diagn, cblas_stbmv(CblasColMajor, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_stbsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, float *a, int *lda, float *x, int *incx) { +void F77_stbsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx) { float *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -314,11 +314,11 @@ void F77_stbsv(int *layout, char *uplow, char *transp, char *diagn, cblas_stbsv(CblasColMajor, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ssbmv(int *layout, char *uplow, int *n, int *k, float *alpha, - float *a, int *lda, float *x, int *incx, float *beta, - float *y, int *incy) { +void F77_ssbmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_INT *k, float *alpha, + float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx, float *beta, + float *y, CBLAS_INT *incy) { float *A; - int i,j,irow,jcol,LDA; + CBLAS_INT i,j,irow,jcol,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -359,10 +359,10 @@ void F77_ssbmv(int *layout, char *uplow, int *n, int *k, float *alpha, *beta, y, *incy ); } -void F77_sspmv(int *layout, char *uplow, int *n, float *alpha, float *ap, - float *x, int *incx, float *beta, float *y, int *incy) { +void F77_sspmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *ap, + float *x, CBLAS_INT *incx, float *beta, float *y, CBLAS_INT *incy) { float *A,*AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -396,10 +396,10 @@ void F77_sspmv(int *layout, char *uplow, int *n, float *alpha, float *ap, *incy ); } -void F77_stpmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, float *ap, float *x, int *incx) { +void F77_stpmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, float *ap, float *x, CBLAS_INT *incx) { float *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -435,10 +435,10 @@ void F77_stpmv(int *layout, char *uplow, char *transp, char *diagn, cblas_stpmv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_stpsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, float *ap, float *x, int *incx) { +void F77_stpsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, float *ap, float *x, CBLAS_INT *incx) { float *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -475,10 +475,10 @@ void F77_stpsv(int *layout, char *uplow, char *transp, char *diagn, cblas_stpsv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_sspr(int *layout, char *uplow, int *n, float *alpha, float *x, - int *incx, float *ap ){ +void F77_sspr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *x, + CBLAS_INT *incx, float *ap ){ float *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -526,10 +526,10 @@ void F77_sspr(int *layout, char *uplow, int *n, float *alpha, float *x, cblas_sspr( CblasColMajor, uplo, *n, *alpha, x, *incx, ap ); } -void F77_sspr2(int *layout, char *uplow, int *n, float *alpha, float *x, - int *incx, float *y, int *incy, float *ap ){ +void F77_sspr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *x, + CBLAS_INT *incx, float *y, CBLAS_INT *incy, float *ap ){ float *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); diff --git a/CBLAS/testing/c_sblas3.c b/CBLAS/testing/c_sblas3.c index 024fc474fd..0621b293d5 100644 --- a/CBLAS/testing/c_sblas3.c +++ b/CBLAS/testing/c_sblas3.c @@ -9,12 +9,12 @@ #include "cblas.h" #include "cblas_test.h" -void F77_sgemm(int *layout, char *transpa, char *transpb, int *m, int *n, - int *k, float *alpha, float *a, int *lda, float *b, int *ldb, - float *beta, float *c, int *ldc ) { +void F77_sgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_INT *k, float *alpha, float *a, CBLAS_INT *lda, float *b, CBLAS_INT *ldb, + float *beta, float *c, CBLAS_INT *ldc ) { float *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_TRANSPOSE transa, transb; get_transpose_type(transpa, &transa); @@ -70,12 +70,12 @@ void F77_sgemm(int *layout, char *transpa, char *transpb, int *m, int *n, cblas_sgemm( UNDEFINED, transa, transb, *m, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_ssymm(int *layout, char *rtlf, char *uplow, int *m, int *n, - float *alpha, float *a, int *lda, float *b, int *ldb, - float *beta, float *c, int *ldc ) { +void F77_ssymm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + float *alpha, float *a, CBLAS_INT *lda, float *b, CBLAS_INT *ldb, + float *beta, float *c, CBLAS_INT *ldc ) { float *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -124,11 +124,11 @@ void F77_ssymm(int *layout, char *rtlf, char *uplow, int *m, int *n, *beta, c, *ldc ); } -void F77_ssyrk(int *layout, char *uplow, char *transp, int *n, int *k, - float *alpha, float *a, int *lda, - float *beta, float *c, int *ldc ) { +void F77_ssyrk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + float *alpha, float *a, CBLAS_INT *lda, + float *beta, float *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; float *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -172,10 +172,10 @@ void F77_ssyrk(int *layout, char *uplow, char *transp, int *n, int *k, c, *ldc ); } -void F77_ssyr2k(int *layout, char *uplow, char *transp, int *n, int *k, - float *alpha, float *a, int *lda, float *b, int *ldb, - float *beta, float *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_ssyr2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + float *alpha, float *a, CBLAS_INT *lda, float *b, CBLAS_INT *ldb, + float *beta, float *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; float *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -227,10 +227,10 @@ void F77_ssyr2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_ssyr2k(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_strmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, float *alpha, float *a, int *lda, float *b, - int *ldb) { - int i,j,LDA,LDB; +void F77_strmm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, float *alpha, float *a, CBLAS_INT *lda, float *b, + CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; float *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; @@ -278,10 +278,10 @@ void F77_strmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, a, *lda, b, *ldb); } -void F77_strsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, float *alpha, float *a, int *lda, float *b, - int *ldb) { - int i,j,LDA,LDB; +void F77_strsm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, float *alpha, float *a, CBLAS_INT *lda, float *b, + CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; float *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; diff --git a/CBLAS/testing/c_xerbla.c b/CBLAS/testing/c_xerbla.c index c3f14c364a..57d61ee1fd 100644 --- a/CBLAS/testing/c_xerbla.c +++ b/CBLAS/testing/c_xerbla.c @@ -7,8 +7,8 @@ void cblas_xerbla(CBLAS_INT info, const char *rout, const char *form, ...) { - extern int cblas_lerr, cblas_info, cblas_ok; - extern int link_xerbla; + extern CBLAS_INT cblas_lerr, cblas_info, cblas_ok; + extern CBLAS_INT link_xerbla; extern int RowMajorStrg; extern char *cblas_rout; @@ -101,9 +101,9 @@ void F77_xerbla(char *srname, void *vinfo) F77_Integer i; extern F77_Integer link_xerbla; #else - int *info=vinfo; - int i; - extern int link_xerbla; + CBLAS_INT *info=vinfo; + CBLAS_INT i; + extern CBLAS_INT link_xerbla; #endif #ifdef F77_Char srname = F2C_STR(F77_srname, XerblaStrLen); diff --git a/CBLAS/testing/c_z2chke.c b/CBLAS/testing/c_z2chke.c index 65e552da3e..23f6896761 100644 --- a/CBLAS/testing/c_z2chke.c +++ b/CBLAS/testing/c_z2chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -32,7 +32,7 @@ void F77_z2chke(char *rout) { ALPHA[2] = {0.0,0.0}, BETA[2] = {0.0,0.0}, RALPHA = 0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_z3chke.c b/CBLAS/testing/c_z3chke.c index 30840489af..d114e8f995 100644 --- a/CBLAS/testing/c_z3chke.c +++ b/CBLAS/testing/c_z3chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -32,7 +32,7 @@ void F77_z3chke(char * rout) { ALPHA[2] = {0.0,0.0}, BETA[2] = {0.0,0.0}, RALPHA = 0.0, RBETA = 0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_zblas1.c b/CBLAS/testing/c_zblas1.c index 2b21d8f187..698397db4b 100644 --- a/CBLAS/testing/c_zblas1.c +++ b/CBLAS/testing/c_zblas1.c @@ -8,67 +8,67 @@ */ #include "cblas_test.h" #include "cblas.h" -void F77_zaxpy(const int *N, const void *alpha, void *X, - const int *incX, void *Y, const int *incY) +void F77_zaxpy(const CBLAS_INT *N, const void *alpha, void *X, + const CBLAS_INT *incX, void *Y, const CBLAS_INT *incY) { cblas_zaxpy(*N, alpha, X, *incX, Y, *incY); return; } -void F77_zcopy(const int *N, void *X, const int *incX, - void *Y, const int *incY) +void F77_zcopy(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY) { cblas_zcopy(*N, X, *incX, Y, *incY); return; } -void F77_zdotc(const int *N, const void *X, const int *incX, - const void *Y, const int *incY,void *dotc) +void F77_zdotc(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX, + const void *Y, const CBLAS_INT *incY,void *dotc) { cblas_zdotc_sub(*N, X, *incX, Y, *incY, dotc); return; } -void F77_zdotu(const int *N, void *X, const int *incX, - void *Y, const int *incY,void *dotu) +void F77_zdotu(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY,void *dotu) { cblas_zdotu_sub(*N, X, *incX, Y, *incY, dotu); return; } -void F77_zdscal(const int *N, const double *alpha, void *X, - const int *incX) +void F77_zdscal(const CBLAS_INT *N, const double *alpha, void *X, + const CBLAS_INT *incX) { cblas_zdscal(*N, *alpha, X, *incX); return; } -void F77_zscal(const int *N, const void * *alpha, void *X, - const int *incX) +void F77_zscal(const CBLAS_INT *N, const void * *alpha, void *X, + const CBLAS_INT *incX) { cblas_zscal(*N, alpha, X, *incX); return; } -void F77_zswap( const int *N, void *X, const int *incX, - void *Y, const int *incY) +void F77_zswap( const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY) { cblas_zswap(*N,X,*incX,Y,*incY); return; } -int F77_izamax(const int *N, const void *X, const int *incX) +CBLAS_INT F77_izamax(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { if (*N < 1 || *incX < 1) return(0); return(cblas_izamax(*N, X, *incX)+1); } -double F77_dznrm2(const int *N, const void *X, const int *incX) +double F77_dznrm2(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { return cblas_dznrm2(*N, X, *incX); } -double F77_dzasum(const int *N, void *X, const int *incX) +double F77_dzasum(const CBLAS_INT *N, void *X, const CBLAS_INT *incX) { return cblas_dzasum(*N, X, *incX); } diff --git a/CBLAS/testing/c_zblas2.c b/CBLAS/testing/c_zblas2.c index b6fbdd628d..b70f9ce5e0 100644 --- a/CBLAS/testing/c_zblas2.c +++ b/CBLAS/testing/c_zblas2.c @@ -8,13 +8,13 @@ #include "cblas.h" #include "cblas_test.h" -void F77_zgemv(int *layout, char *transp, int *m, int *n, +void F77_zgemv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, const void *alpha, - CBLAS_TEST_ZOMPLEX *a, int *lda, const void *x, int *incx, - const void *beta, void *y, int *incy) { + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, const void *x, CBLAS_INT *incx, + const void *beta, void *y, CBLAS_INT *incy) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -38,13 +38,13 @@ void F77_zgemv(int *layout, char *transp, int *m, int *n, *m, *n, alpha, a, *lda, x, *incx, beta, y, *incy ); } -void F77_zgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *x, int *incx, - CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, int *incy) { +void F77_zgbmv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, CBLAS_INT *kl, CBLAS_INT *ku, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, + CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy) { CBLAS_TEST_ZOMPLEX *A; - int i,j,irow,jcol,LDA; + CBLAS_INT i,j,irow,jcol,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -85,12 +85,12 @@ void F77_zgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, *incx, beta, y, *incy ); } -void F77_zgeru(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, - CBLAS_TEST_ZOMPLEX *a, int *lda){ +void F77_zgeru(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda){ CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -114,11 +114,11 @@ void F77_zgeru(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, cblas_zgeru( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); } -void F77_zgerc(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, - CBLAS_TEST_ZOMPLEX *a, int *lda) { +void F77_zgerc(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -142,12 +142,12 @@ void F77_zgerc(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, cblas_zgerc( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); } -void F77_zhemv(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx, CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, int *incy){ +void F77_zhemv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -172,13 +172,13 @@ void F77_zhemv(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, beta, y, *incy ); } -void F77_zhbmv(int *layout, char *uplow, int *n, int *k, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *y, int *incy){ +void F77_zhbmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_ZOMPLEX *A; -int i,irow,j,jcol,LDA; +CBLAS_INT i,irow,j,jcol,LDA; CBLAS_UPLO uplo; @@ -236,12 +236,12 @@ int i,irow,j,jcol,LDA; beta, y, *incy ); } -void F77_zhpmv(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, int *incx, - CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, int *incy){ +void F77_zhpmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, + CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_ZOMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -292,11 +292,11 @@ void F77_zhpmv(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, *incy ); } -void F77_ztbmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx) { +void F77_ztbmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -355,12 +355,12 @@ void F77_ztbmv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztbmv(UNDEFINED, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ztbsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx) { +void F77_ztbsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -419,10 +419,10 @@ void F77_ztbsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztbsv(UNDEFINED, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ztpmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, int *incx) { +void F77_ztpmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -474,10 +474,10 @@ void F77_ztpmv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztpmv( UNDEFINED, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_ztpsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, int *incx) { +void F77_ztpsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -529,11 +529,11 @@ void F77_ztpsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztpsv( UNDEFINED, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_ztrmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx) { +void F77_ztrmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -558,11 +558,11 @@ void F77_ztrmv(int *layout, char *uplow, char *transp, char *diagn, else cblas_ztrmv(UNDEFINED, uplo, trans, diag, *n, a, *lda, x, *incx); } -void F77_ztrsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx) { +void F77_ztrsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -588,10 +588,10 @@ void F77_ztrsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztrsv(UNDEFINED, uplo, trans, diag, *n, a, *lda, x, *incx ); } -void F77_zhpr(int *layout, char *uplow, int *n, double *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *ap) { +void F77_zhpr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *ap) { CBLAS_TEST_ZOMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -663,11 +663,11 @@ void F77_zhpr(int *layout, char *uplow, int *n, double *alpha, cblas_zhpr(UNDEFINED, uplo, *n, *alpha, x, *incx, ap ); } -void F77_zhpr2(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, +void F77_zhpr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy, CBLAS_TEST_ZOMPLEX *ap) { CBLAS_TEST_ZOMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -740,10 +740,10 @@ void F77_zhpr2(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, cblas_zhpr2( UNDEFINED, uplo, *n, alpha, x, *incx, y, *incy, ap ); } -void F77_zher(int *layout, char *uplow, int *n, double *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *a, int *lda) { +void F77_zher(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -772,12 +772,12 @@ void F77_zher(int *layout, char *uplow, int *n, double *alpha, cblas_zher( UNDEFINED, uplo, *n, *alpha, x, *incx, a, *lda ); } -void F77_zher2(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, - CBLAS_TEST_ZOMPLEX *a, int *lda) { +void F77_zher2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); diff --git a/CBLAS/testing/c_zblas3.c b/CBLAS/testing/c_zblas3.c index 65a821359c..891c70a83d 100644 --- a/CBLAS/testing/c_zblas3.c +++ b/CBLAS/testing/c_zblas3.c @@ -11,13 +11,13 @@ #define TEST_ROW_MJR 1 #define UNDEFINED -1 -void F77_zgemm(int *layout, char *transpa, char *transpb, int *m, int *n, - int *k, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_INT *k, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_ZOMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_TRANSPOSE transa, transb; get_transpose_type(transpa, &transa); @@ -87,13 +87,13 @@ void F77_zgemm(int *layout, char *transpa, char *transpb, int *m, int *n, cblas_zgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_zhemm(int *layout, char *rtlf, char *uplow, int *m, int *n, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zhemm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_ZOMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -151,13 +151,13 @@ void F77_zhemm(int *layout, char *rtlf, char *uplow, int *m, int *n, cblas_zhemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_zsymm(int *layout, char *rtlf, char *uplow, int *m, int *n, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zsymm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_ZOMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -206,11 +206,11 @@ void F77_zsymm(int *layout, char *rtlf, char *uplow, int *m, int *n, beta, c, *ldc ); } -void F77_zherk(int *layout, char *uplow, char *transp, int *n, int *k, - double *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - double *beta, CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zherk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + double *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + double *beta, CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; CBLAS_TEST_ZOMPLEX *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -262,11 +262,11 @@ void F77_zherk(int *layout, char *uplow, char *transp, int *n, int *k, c, *ldc ); } -void F77_zsyrk(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zsyrk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; CBLAS_TEST_ZOMPLEX *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -317,11 +317,11 @@ void F77_zsyrk(int *layout, char *uplow, char *transp, int *n, int *k, cblas_zsyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta, c, *ldc ); } -void F77_zher2k(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, double *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_zher2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, double *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; CBLAS_TEST_ZOMPLEX *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -381,11 +381,11 @@ void F77_zher2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_zher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_zsyr2k(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_zsyr2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; CBLAS_TEST_ZOMPLEX *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -445,10 +445,10 @@ void F77_zsyr2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_zsyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_ztrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, - int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { - int i,j,LDA,LDB; +void F77_ztrmm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, + CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; CBLAS_TEST_ZOMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; @@ -504,10 +504,10 @@ void F77_ztrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, a, *lda, b, *ldb); } -void F77_ztrsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, - int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { - int i,j,LDA,LDB; +void F77_ztrsm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, + CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; CBLAS_TEST_ZOMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; From 6ae0fbe7aaec440323f129fb3146445468dc1fa0 Mon Sep 17 00:00:00 2001 From: Alexandr Kobotov Date: Tue, 29 Nov 2022 18:55:08 +0700 Subject: [PATCH 78/90] Fix uninitialized M when quick return in DLARRD and SLARRD --- SRC/dlarrd.f | 6 +----- SRC/slarrd.f | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/SRC/dlarrd.f b/SRC/dlarrd.f index 08dfd02c30..ea1896adf2 100644 --- a/SRC/dlarrd.f +++ b/SRC/dlarrd.f @@ -381,6 +381,7 @@ SUBROUTINE DLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, * .. Executable Statements .. * INFO = 0 + M = 0 * * Quick return if possible * @@ -424,14 +425,9 @@ SUBROUTINE DLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, END IF * Initialize error flags - INFO = 0 NCNVRG = .FALSE. TOOFEW = .FALSE. -* Quick return if possible - M = 0 - IF( N.EQ.0 ) RETURN - * Simplification: IF( IRANGE.EQ.INDRNG .AND. IL.EQ.1 .AND. IU.EQ.N ) IRANGE = 1 diff --git a/SRC/slarrd.f b/SRC/slarrd.f index 7df8e95fca..21405baa6a 100644 --- a/SRC/slarrd.f +++ b/SRC/slarrd.f @@ -381,6 +381,7 @@ SUBROUTINE SLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, * .. Executable Statements .. * INFO = 0 + M = 0 * * Quick return if possible * @@ -424,14 +425,9 @@ SUBROUTINE SLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, END IF * Initialize error flags - INFO = 0 NCNVRG = .FALSE. TOOFEW = .FALSE. -* Quick return if possible - M = 0 - IF( N.EQ.0 ) RETURN - * Simplification: IF( IRANGE.EQ.INDRNG .AND. IL.EQ.1 .AND. IU.EQ.N ) IRANGE = 1 From 01fb2060b561890a356dd805cb8bfdf533e9c9a3 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Thu, 1 Dec 2022 15:21:57 -0700 Subject: [PATCH 79/90] In LAPACKE tgsen, allocate iwork when ijob = 0. Fixes #772. --- LAPACKE/src/lapacke_ctgsen.c | 14 +++++--------- LAPACKE/src/lapacke_dtgsen.c | 14 +++++--------- LAPACKE/src/lapacke_stgsen.c | 14 +++++--------- LAPACKE/src/lapacke_ztgsen.c | 14 +++++--------- 4 files changed, 20 insertions(+), 36 deletions(-) diff --git a/LAPACKE/src/lapacke_ctgsen.c b/LAPACKE/src/lapacke_ctgsen.c index c543cd44f9..b69c08bb44 100644 --- a/LAPACKE/src/lapacke_ctgsen.c +++ b/LAPACKE/src/lapacke_ctgsen.c @@ -86,12 +86,10 @@ lapack_int LAPACKE_ctgsen( int matrix_layout, lapack_int ijob, liwork = iwork_query; lwork = LAPACK_C2INT( work_query ); /* Allocate memory for work arrays */ - if( ijob != 0 ) { - iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if( iwork == NULL ) { - info = LAPACK_WORK_MEMORY_ERROR; - goto exit_level_0; - } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; } work = (lapack_complex_float*) LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); @@ -106,9 +104,7 @@ lapack_int LAPACKE_ctgsen( int matrix_layout, lapack_int ijob, /* Release memory and exit */ LAPACKE_free( work ); exit_level_1: - if( ijob != 0 ) { - LAPACKE_free( iwork ); - } + LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_ctgsen", info ); diff --git a/LAPACKE/src/lapacke_dtgsen.c b/LAPACKE/src/lapacke_dtgsen.c index 11496c1c0f..883a795798 100644 --- a/LAPACKE/src/lapacke_dtgsen.c +++ b/LAPACKE/src/lapacke_dtgsen.c @@ -83,12 +83,10 @@ lapack_int LAPACKE_dtgsen( int matrix_layout, lapack_int ijob, liwork = iwork_query; lwork = (lapack_int)work_query; /* Allocate memory for work arrays */ - if( ijob != 0 ) { - iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if( iwork == NULL ) { - info = LAPACK_WORK_MEMORY_ERROR; - goto exit_level_0; - } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; } work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); if( work == NULL ) { @@ -103,9 +101,7 @@ lapack_int LAPACKE_dtgsen( int matrix_layout, lapack_int ijob, /* Release memory and exit */ LAPACKE_free( work ); exit_level_1: - if( ijob != 0 ) { - LAPACKE_free( iwork ); - } + LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_dtgsen", info ); diff --git a/LAPACKE/src/lapacke_stgsen.c b/LAPACKE/src/lapacke_stgsen.c index c3b0c4bf82..db5b7e91c1 100644 --- a/LAPACKE/src/lapacke_stgsen.c +++ b/LAPACKE/src/lapacke_stgsen.c @@ -83,12 +83,10 @@ lapack_int LAPACKE_stgsen( int matrix_layout, lapack_int ijob, liwork = iwork_query; lwork = (lapack_int)work_query; /* Allocate memory for work arrays */ - if( ijob != 0 ) { - iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if( iwork == NULL ) { - info = LAPACK_WORK_MEMORY_ERROR; - goto exit_level_0; - } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; } work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); if( work == NULL ) { @@ -103,9 +101,7 @@ lapack_int LAPACKE_stgsen( int matrix_layout, lapack_int ijob, /* Release memory and exit */ LAPACKE_free( work ); exit_level_1: - if( ijob != 0 ) { - LAPACKE_free( iwork ); - } + LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_stgsen", info ); diff --git a/LAPACKE/src/lapacke_ztgsen.c b/LAPACKE/src/lapacke_ztgsen.c index 8c86d5e00e..039da18b48 100644 --- a/LAPACKE/src/lapacke_ztgsen.c +++ b/LAPACKE/src/lapacke_ztgsen.c @@ -86,12 +86,10 @@ lapack_int LAPACKE_ztgsen( int matrix_layout, lapack_int ijob, liwork = iwork_query; lwork = LAPACK_Z2INT( work_query ); /* Allocate memory for work arrays */ - if( ijob != 0 ) { - iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if( iwork == NULL ) { - info = LAPACK_WORK_MEMORY_ERROR; - goto exit_level_0; - } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; } work = (lapack_complex_double*) LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); @@ -106,9 +104,7 @@ lapack_int LAPACKE_ztgsen( int matrix_layout, lapack_int ijob, /* Release memory and exit */ LAPACKE_free( work ); exit_level_1: - if( ijob != 0 ) { - LAPACKE_free( iwork ); - } + LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_ztgsen", info ); From 9a616e191c3d8b27fd593c0d13658d866370a053 Mon Sep 17 00:00:00 2001 From: "Chereshnev, Eugene" Date: Mon, 5 Dec 2022 09:46:47 -0800 Subject: [PATCH 80/90] Fix uninitialized out vars in *LARR* functions --- SRC/dlarra.f | 2 +- SRC/dlarrc.f | 6 +++--- SRC/dlarre.f | 4 ++-- SRC/slarra.f | 2 +- SRC/slarrc.f | 6 +++--- SRC/slarre.f | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/SRC/dlarra.f b/SRC/dlarra.f index 2fb30cd762..a62a079da0 100644 --- a/SRC/dlarra.f +++ b/SRC/dlarra.f @@ -164,6 +164,7 @@ SUBROUTINE DLARRA( N, D, E, E2, SPLTOL, TNRM, * .. Executable Statements .. * INFO = 0 + NSPLIT = 1 * * Quick return if possible * @@ -172,7 +173,6 @@ SUBROUTINE DLARRA( N, D, E, E2, SPLTOL, TNRM, END IF * * Compute splitting points - NSPLIT = 1 IF(SPLTOL.LT.ZERO) THEN * Criterion based on absolute off-diagonal value TMP1 = ABS(SPLTOL)* TNRM diff --git a/SRC/dlarrc.f b/SRC/dlarrc.f index 55a17626ac..d3fea59666 100644 --- a/SRC/dlarrc.f +++ b/SRC/dlarrc.f @@ -167,6 +167,9 @@ SUBROUTINE DLARRC( JOBT, N, VL, VU, D, E, PIVMIN, * .. Executable Statements .. * INFO = 0 + LCNT = 0 + RCNT = 0 + EIGCNT = 0 * * Quick return if possible * @@ -174,9 +177,6 @@ SUBROUTINE DLARRC( JOBT, N, VL, VU, D, E, PIVMIN, RETURN END IF * - LCNT = 0 - RCNT = 0 - EIGCNT = 0 MATT = LSAME( JOBT, 'T' ) diff --git a/SRC/dlarre.f b/SRC/dlarre.f index afbfe6379c..70f59b8295 100644 --- a/SRC/dlarre.f +++ b/SRC/dlarre.f @@ -367,6 +367,8 @@ SUBROUTINE DLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, * INFO = 0 + NSPLIT = 0 + M = 0 * * Quick return if possible * @@ -384,8 +386,6 @@ SUBROUTINE DLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, IRANGE = INDRNG END IF - M = 0 - * Get machine constants SAFMIN = DLAMCH( 'S' ) EPS = DLAMCH( 'P' ) diff --git a/SRC/slarra.f b/SRC/slarra.f index be81b0f691..2e27383e13 100644 --- a/SRC/slarra.f +++ b/SRC/slarra.f @@ -164,6 +164,7 @@ SUBROUTINE SLARRA( N, D, E, E2, SPLTOL, TNRM, * .. Executable Statements .. * INFO = 0 + NSPLIT = 1 * * Quick return if possible * @@ -172,7 +173,6 @@ SUBROUTINE SLARRA( N, D, E, E2, SPLTOL, TNRM, END IF * * Compute splitting points - NSPLIT = 1 IF(SPLTOL.LT.ZERO) THEN * Criterion based on absolute off-diagonal value TMP1 = ABS(SPLTOL)* TNRM diff --git a/SRC/slarrc.f b/SRC/slarrc.f index 060051539f..2100d1b3dc 100644 --- a/SRC/slarrc.f +++ b/SRC/slarrc.f @@ -167,6 +167,9 @@ SUBROUTINE SLARRC( JOBT, N, VL, VU, D, E, PIVMIN, * .. Executable Statements .. * INFO = 0 + LCNT = 0 + RCNT = 0 + EIGCNT = 0 * * Quick return if possible * @@ -174,9 +177,6 @@ SUBROUTINE SLARRC( JOBT, N, VL, VU, D, E, PIVMIN, RETURN END IF * - LCNT = 0 - RCNT = 0 - EIGCNT = 0 MATT = LSAME( JOBT, 'T' ) diff --git a/SRC/slarre.f b/SRC/slarre.f index 34dd71fd9d..2e34ca5a6a 100644 --- a/SRC/slarre.f +++ b/SRC/slarre.f @@ -367,6 +367,8 @@ SUBROUTINE SLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, * INFO = 0 + NSPLIT = 0 + M = 0 * * Quick return if possible * @@ -384,8 +386,6 @@ SUBROUTINE SLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, IRANGE = INDRNG END IF - M = 0 - * Get machine constants SAFMIN = SLAMCH( 'S' ) EPS = SLAMCH( 'P' ) From b0b45774512b300947ab4553af7089641789464c Mon Sep 17 00:00:00 2001 From: "J.L.G. Pallero" Date: Fri, 13 Jan 2023 19:29:46 +0100 Subject: [PATCH 81/90] Warnings supression in some LAPACKE functions --- LAPACKE/src/lapacke_clarfb.c | 2 +- LAPACKE/src/lapacke_clarfb_work.c | 2 +- LAPACKE/src/lapacke_dlarfb.c | 2 +- LAPACKE/src/lapacke_dlarfb_work.c | 2 +- LAPACKE/src/lapacke_slarfb.c | 2 +- LAPACKE/src/lapacke_slarfb_work.c | 2 +- LAPACKE/src/lapacke_zlarfb.c | 2 +- LAPACKE/src/lapacke_zlarfb_work.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/LAPACKE/src/lapacke_clarfb.c b/LAPACKE/src/lapacke_clarfb.c index 8b1492becf..594774f88b 100644 --- a/LAPACKE/src/lapacke_clarfb.c +++ b/LAPACKE/src/lapacke_clarfb.c @@ -60,7 +60,7 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_clarfb_work.c b/LAPACKE/src/lapacke_clarfb_work.c index 90ff0851f0..488e17d4b7 100644 --- a/LAPACKE/src/lapacke_clarfb_work.c +++ b/LAPACKE/src/lapacke_clarfb_work.c @@ -81,7 +81,7 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); return info; } - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { info = -8; LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_dlarfb.c b/LAPACKE/src/lapacke_dlarfb.c index 82e8fae527..fa6cccb7a2 100644 --- a/LAPACKE/src/lapacke_dlarfb.c +++ b/LAPACKE/src/lapacke_dlarfb.c @@ -59,7 +59,7 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_dlarfb_work.c b/LAPACKE/src/lapacke_dlarfb_work.c index 1a68bf7624..78ac4cd608 100644 --- a/LAPACKE/src/lapacke_dlarfb_work.c +++ b/LAPACKE/src/lapacke_dlarfb_work.c @@ -80,7 +80,7 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); return info; } - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { info = -8; LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_slarfb.c b/LAPACKE/src/lapacke_slarfb.c index 892648f4b7..4b77d83904 100644 --- a/LAPACKE/src/lapacke_slarfb.c +++ b/LAPACKE/src/lapacke_slarfb.c @@ -59,7 +59,7 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_slarfb_work.c b/LAPACKE/src/lapacke_slarfb_work.c index d805a947ae..cc44466ad3 100644 --- a/LAPACKE/src/lapacke_slarfb_work.c +++ b/LAPACKE/src/lapacke_slarfb_work.c @@ -80,7 +80,7 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); return info; } - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { info = -8; LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_zlarfb.c b/LAPACKE/src/lapacke_zlarfb.c index 25cedb5063..21bf9a5ed4 100644 --- a/LAPACKE/src/lapacke_zlarfb.c +++ b/LAPACKE/src/lapacke_zlarfb.c @@ -60,7 +60,7 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_zlarfb_work.c b/LAPACKE/src/lapacke_zlarfb_work.c index 64eb05263a..adc5fb4d25 100644 --- a/LAPACKE/src/lapacke_zlarfb_work.c +++ b/LAPACKE/src/lapacke_zlarfb_work.c @@ -81,7 +81,7 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); return info; } - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { info = -8; LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); return info; From 60a37704067d79c898355e14e420b53a39c18825 Mon Sep 17 00:00:00 2001 From: langou Date: Fri, 13 Jan 2023 12:48:55 -0700 Subject: [PATCH 82/90] Revert "Warnings supression in some LAPACKE functions" --- LAPACKE/src/lapacke_clarfb.c | 2 +- LAPACKE/src/lapacke_clarfb_work.c | 2 +- LAPACKE/src/lapacke_dlarfb.c | 2 +- LAPACKE/src/lapacke_dlarfb_work.c | 2 +- LAPACKE/src/lapacke_slarfb.c | 2 +- LAPACKE/src/lapacke_slarfb_work.c | 2 +- LAPACKE/src/lapacke_zlarfb.c | 2 +- LAPACKE/src/lapacke_zlarfb_work.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/LAPACKE/src/lapacke_clarfb.c b/LAPACKE/src/lapacke_clarfb.c index 594774f88b..8b1492becf 100644 --- a/LAPACKE/src/lapacke_clarfb.c +++ b/LAPACKE/src/lapacke_clarfb.c @@ -60,7 +60,7 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_clarfb_work.c b/LAPACKE/src/lapacke_clarfb_work.c index 488e17d4b7..90ff0851f0 100644 --- a/LAPACKE/src/lapacke_clarfb_work.c +++ b/LAPACKE/src/lapacke_clarfb_work.c @@ -81,7 +81,7 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); return info; } - if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { info = -8; LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_dlarfb.c b/LAPACKE/src/lapacke_dlarfb.c index fa6cccb7a2..82e8fae527 100644 --- a/LAPACKE/src/lapacke_dlarfb.c +++ b/LAPACKE/src/lapacke_dlarfb.c @@ -59,7 +59,7 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_dlarfb_work.c b/LAPACKE/src/lapacke_dlarfb_work.c index 78ac4cd608..1a68bf7624 100644 --- a/LAPACKE/src/lapacke_dlarfb_work.c +++ b/LAPACKE/src/lapacke_dlarfb_work.c @@ -80,7 +80,7 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); return info; } - if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { info = -8; LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_slarfb.c b/LAPACKE/src/lapacke_slarfb.c index 4b77d83904..892648f4b7 100644 --- a/LAPACKE/src/lapacke_slarfb.c +++ b/LAPACKE/src/lapacke_slarfb.c @@ -59,7 +59,7 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_slarfb_work.c b/LAPACKE/src/lapacke_slarfb_work.c index cc44466ad3..d805a947ae 100644 --- a/LAPACKE/src/lapacke_slarfb_work.c +++ b/LAPACKE/src/lapacke_slarfb_work.c @@ -80,7 +80,7 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); return info; } - if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { info = -8; LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_zlarfb.c b/LAPACKE/src/lapacke_zlarfb.c index 21bf9a5ed4..25cedb5063 100644 --- a/LAPACKE/src/lapacke_zlarfb.c +++ b/LAPACKE/src/lapacke_zlarfb.c @@ -60,7 +60,7 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_zlarfb_work.c b/LAPACKE/src/lapacke_zlarfb_work.c index adc5fb4d25..64eb05263a 100644 --- a/LAPACKE/src/lapacke_zlarfb_work.c +++ b/LAPACKE/src/lapacke_zlarfb_work.c @@ -81,7 +81,7 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); return info; } - if( ( !forward && ( col && k > nrows_v ) ) || ( !col && k > ncols_v )) { + if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { info = -8; LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); return info; From 08f7efeba8441e93e766353fb2c53d251a0b511a Mon Sep 17 00:00:00 2001 From: "J.L.G. Pallero" Date: Fri, 13 Jan 2023 23:39:24 +0100 Subject: [PATCH 83/90] Warnings supression and deletion of unnecessary check in some LAPACKE xLARFB functions --- LAPACKE/src/lapacke_clarfb.c | 2 +- LAPACKE/src/lapacke_clarfb_work.c | 2 +- LAPACKE/src/lapacke_dlarfb.c | 2 +- LAPACKE/src/lapacke_dlarfb_work.c | 2 +- LAPACKE/src/lapacke_slarfb.c | 2 +- LAPACKE/src/lapacke_slarfb_work.c | 2 +- LAPACKE/src/lapacke_zlarfb.c | 2 +- LAPACKE/src/lapacke_zlarfb_work.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/LAPACKE/src/lapacke_clarfb.c b/LAPACKE/src/lapacke_clarfb.c index 8b1492becf..ed12b476eb 100644 --- a/LAPACKE/src/lapacke_clarfb.c +++ b/LAPACKE/src/lapacke_clarfb.c @@ -60,7 +60,7 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_clarfb_work.c b/LAPACKE/src/lapacke_clarfb_work.c index 90ff0851f0..545769b83c 100644 --- a/LAPACKE/src/lapacke_clarfb_work.c +++ b/LAPACKE/src/lapacke_clarfb_work.c @@ -81,7 +81,7 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); return info; } - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { info = -8; LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_dlarfb.c b/LAPACKE/src/lapacke_dlarfb.c index 82e8fae527..f4ddc62a58 100644 --- a/LAPACKE/src/lapacke_dlarfb.c +++ b/LAPACKE/src/lapacke_dlarfb.c @@ -59,7 +59,7 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_dlarfb_work.c b/LAPACKE/src/lapacke_dlarfb_work.c index 1a68bf7624..de444c1466 100644 --- a/LAPACKE/src/lapacke_dlarfb_work.c +++ b/LAPACKE/src/lapacke_dlarfb_work.c @@ -80,7 +80,7 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); return info; } - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { info = -8; LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_slarfb.c b/LAPACKE/src/lapacke_slarfb.c index 892648f4b7..d36958f93d 100644 --- a/LAPACKE/src/lapacke_slarfb.c +++ b/LAPACKE/src/lapacke_slarfb.c @@ -59,7 +59,7 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_slarfb_work.c b/LAPACKE/src/lapacke_slarfb_work.c index d805a947ae..8b61276337 100644 --- a/LAPACKE/src/lapacke_slarfb_work.c +++ b/LAPACKE/src/lapacke_slarfb_work.c @@ -80,7 +80,7 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); return info; } - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { info = -8; LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); return info; diff --git a/LAPACKE/src/lapacke_zlarfb.c b/LAPACKE/src/lapacke_zlarfb.c index 25cedb5063..85355b202d 100644 --- a/LAPACKE/src/lapacke_zlarfb.c +++ b/LAPACKE/src/lapacke_zlarfb.c @@ -60,7 +60,7 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); return -8; } diff --git a/LAPACKE/src/lapacke_zlarfb_work.c b/LAPACKE/src/lapacke_zlarfb_work.c index 64eb05263a..72d85ec82a 100644 --- a/LAPACKE/src/lapacke_zlarfb_work.c +++ b/LAPACKE/src/lapacke_zlarfb_work.c @@ -81,7 +81,7 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); return info; } - if( !forward && ( col && k > nrows_v ) || ( !col && k > ncols_v )) { + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { info = -8; LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); return info; From 9f7c029040ab559daaa056c6d6b0bca78b1ce256 Mon Sep 17 00:00:00 2001 From: Dmitry Klyuchinsky Date: Wed, 8 Feb 2023 14:46:12 +0700 Subject: [PATCH 84/90] remove redundant space from xerbla call sbgv/hbgv --- SRC/chbgv.f | 2 +- SRC/dsbgv.f | 2 +- SRC/ssbgv.f | 2 +- SRC/zhbgv.f | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SRC/chbgv.f b/SRC/chbgv.f index 130594a0f2..3f84ff5d96 100644 --- a/SRC/chbgv.f +++ b/SRC/chbgv.f @@ -235,7 +235,7 @@ SUBROUTINE CHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, INFO = -12 END IF IF( INFO.NE.0 ) THEN - CALL XERBLA( 'CHBGV ', -INFO ) + CALL XERBLA( 'CHBGV', -INFO ) RETURN END IF * diff --git a/SRC/dsbgv.f b/SRC/dsbgv.f index b49df0063e..9bb07a1fa2 100644 --- a/SRC/dsbgv.f +++ b/SRC/dsbgv.f @@ -228,7 +228,7 @@ SUBROUTINE DSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, INFO = -12 END IF IF( INFO.NE.0 ) THEN - CALL XERBLA( 'DSBGV ', -INFO ) + CALL XERBLA( 'DSBGV', -INFO ) RETURN END IF * diff --git a/SRC/ssbgv.f b/SRC/ssbgv.f index 0a7f73e0a8..8930867658 100644 --- a/SRC/ssbgv.f +++ b/SRC/ssbgv.f @@ -228,7 +228,7 @@ SUBROUTINE SSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, INFO = -12 END IF IF( INFO.NE.0 ) THEN - CALL XERBLA( 'SSBGV ', -INFO ) + CALL XERBLA( 'SSBGV', -INFO ) RETURN END IF * diff --git a/SRC/zhbgv.f b/SRC/zhbgv.f index 2d587bd027..297ad6da73 100644 --- a/SRC/zhbgv.f +++ b/SRC/zhbgv.f @@ -235,7 +235,7 @@ SUBROUTINE ZHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, INFO = -12 END IF IF( INFO.NE.0 ) THEN - CALL XERBLA( 'ZHBGV ', -INFO ) + CALL XERBLA( 'ZHBGV', -INFO ) RETURN END IF * From 65850558e0104c510be28af5066b570865cbc30f Mon Sep 17 00:00:00 2001 From: thijs Date: Fri, 10 Feb 2023 08:51:57 +0100 Subject: [PATCH 85/90] don't subtract N_DEFLATED from istop twice --- SRC/claqz0.f | 2 +- SRC/dlaqz0.f | 2 +- SRC/slaqz0.f | 2 +- SRC/zlaqz0.f | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SRC/claqz0.f b/SRC/claqz0.f index 2284fd65d9..29eed203ab 100644 --- a/SRC/claqz0.f +++ b/SRC/claqz0.f @@ -648,7 +648,7 @@ RECURSIVE SUBROUTINE CLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, NS = MIN( NSHIFTS, ISTOP-ISTART2 ) NS = MIN( NS, N_UNDEFLATED ) - SHIFTPOS = ISTOP-N_DEFLATED-N_UNDEFLATED+1 + SHIFTPOS = ISTOP-N_UNDEFLATED+1 IF ( MOD( LD, 6 ) .EQ. 0 ) THEN * diff --git a/SRC/dlaqz0.f b/SRC/dlaqz0.f index 1bf65fd601..be236987bd 100644 --- a/SRC/dlaqz0.f +++ b/SRC/dlaqz0.f @@ -682,7 +682,7 @@ RECURSIVE SUBROUTINE DLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, NS = MIN( NSHIFTS, ISTOP-ISTART2 ) NS = MIN( NS, N_UNDEFLATED ) - SHIFTPOS = ISTOP-N_DEFLATED-N_UNDEFLATED+1 + SHIFTPOS = ISTOP-N_UNDEFLATED+1 * * Shuffle shifts to put double shifts in front * This ensures that we don't split up a double shift diff --git a/SRC/slaqz0.f b/SRC/slaqz0.f index 15913be88c..b99113c0b8 100644 --- a/SRC/slaqz0.f +++ b/SRC/slaqz0.f @@ -678,7 +678,7 @@ RECURSIVE SUBROUTINE SLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, NS = MIN( NSHIFTS, ISTOP-ISTART2 ) NS = MIN( NS, N_UNDEFLATED ) - SHIFTPOS = ISTOP-N_DEFLATED-N_UNDEFLATED+1 + SHIFTPOS = ISTOP-N_UNDEFLATED+1 * * Shuffle shifts to put double shifts in front * This ensures that we don't split up a double shift diff --git a/SRC/zlaqz0.f b/SRC/zlaqz0.f index 2616f20b5b..9ea6f20178 100644 --- a/SRC/zlaqz0.f +++ b/SRC/zlaqz0.f @@ -649,7 +649,7 @@ RECURSIVE SUBROUTINE ZLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, NS = MIN( NSHIFTS, ISTOP-ISTART2 ) NS = MIN( NS, N_UNDEFLATED ) - SHIFTPOS = ISTOP-N_DEFLATED-N_UNDEFLATED+1 + SHIFTPOS = ISTOP-N_UNDEFLATED+1 IF ( MOD( LD, 6 ) .EQ. 0 ) THEN * From 5fd6d647345f27f0239e114effe9d1012c30e92d Mon Sep 17 00:00:00 2001 From: Dmitry Klyuchinsky Date: Tue, 14 Feb 2023 15:21:46 +0700 Subject: [PATCH 86/90] fix rfp test: divide rows of matrix by corresponding diagonal element for DIAG = U --- TESTING/LIN/cdrvrf3.f | 56 +++++++++++++++++++++++++++++++++---------- TESTING/LIN/ddrvrf3.f | 53 ++++++++++++++++++++++++++++++---------- TESTING/LIN/sdrvrf3.f | 53 ++++++++++++++++++++++++++++++---------- TESTING/LIN/zdrvrf3.f | 56 +++++++++++++++++++++++++++++++++---------- 4 files changed, 168 insertions(+), 50 deletions(-) diff --git a/TESTING/LIN/cdrvrf3.f b/TESTING/LIN/cdrvrf3.f index 1ca816979a..d0edf75e10 100644 --- a/TESTING/LIN/cdrvrf3.f +++ b/TESTING/LIN/cdrvrf3.f @@ -156,9 +156,10 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, REAL RESULT( NTESTS ) * .. * .. External Functions .. + LOGICAL LSAME REAL SLAMCH, CLANGE COMPLEX CLARND - EXTERNAL SLAMCH, CLARND, CLANGE + EXTERNAL SLAMCH, CLARND, CLANGE, LSAME * .. * .. External Subroutines .. EXTERNAL CTRTTF, CGEQRF, CGEQLF, CTFSM, CTRSM @@ -222,9 +223,9 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO 100 IALPHA = 1, 3 * - IF ( IALPHA.EQ. 1) THEN + IF ( IALPHA.EQ.1 ) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 2) THEN + ELSE IF ( IALPHA.EQ.2 ) THEN ALPHA = ONE ELSE ALPHA = CLARND( 4, ISEED ) @@ -263,7 +264,7 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, NA DO I = 1, NA - A( I, J) = CLARND( 4, ISEED ) + A( I, J ) = CLARND( 4, ISEED ) END DO END DO * @@ -276,6 +277,20 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL CGEQRF( NA, NA, A, LDA, TAU, + C_WORK_CGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO J = 1, NA + DO I = 1, J + A( I, J ) = A( I, J ) / + + ( 2.0 * A( J, J ) ) + END DO + END DO + END IF +* ELSE * * The case IUPLO.EQ.2 is when SIDE.EQ.'L' @@ -285,6 +300,20 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL CGELQF( NA, NA, A, LDA, TAU, + C_WORK_CGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO I = 1, NA + DO J = 1, I + A( I, J ) = A( I, J ) / + + ( 2.0 * A( I, I ) ) + END DO + END DO + END IF +* END IF * * After the QR factorization, the diagonal @@ -293,7 +322,8 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * value 1.0E+00. * DO J = 1, NA - A( J, J) = A(J,J) * CLARND( 5, ISEED ) + A( J, J ) = A( J, J ) * + + CLARND( 5, ISEED ) END DO * * Store a copy of A in RFP format (in ARF). @@ -307,8 +337,8 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = CLARND( 4, ISEED ) - B2( I, J) = B1( I, J) + B1( I, J ) = CLARND( 4, ISEED ) + B2( I, J ) = B1( I, J ) END DO END DO * @@ -331,24 +361,24 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = B2( I, J ) - B1( I, J ) + B1( I, J ) = B2( I, J ) - B1( I, J ) END DO END DO * - RESULT(1) = CLANGE( 'I', M, N, B1, LDA, + RESULT( 1 ) = CLANGE( 'I', M, N, B1, LDA, + S_WORK_CLANGE ) * - RESULT(1) = RESULT(1) / SQRT( EPS ) - + / MAX ( MAX( M, N), 1 ) + RESULT( 1 ) = RESULT( 1 ) / SQRT( EPS ) + + / MAX ( MAX( M, N ), 1 ) * - IF( RESULT(1).GE.THRESH ) THEN + IF( RESULT( 1 ).GE.THRESH ) THEN IF( NFAIL.EQ.0 ) THEN WRITE( NOUT, * ) WRITE( NOUT, FMT = 9999 ) END IF WRITE( NOUT, FMT = 9997 ) 'CTFSM', + CFORM, SIDE, UPLO, TRANS, DIAG, M, - + N, RESULT(1) + + N, RESULT( 1 ) NFAIL = NFAIL + 1 END IF * diff --git a/TESTING/LIN/ddrvrf3.f b/TESTING/LIN/ddrvrf3.f index 1c5d74aea0..ef823c2e75 100644 --- a/TESTING/LIN/ddrvrf3.f +++ b/TESTING/LIN/ddrvrf3.f @@ -153,8 +153,9 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Functions .. + LOGICAL LSAME DOUBLE PRECISION DLAMCH, DLANGE, DLARND - EXTERNAL DLAMCH, DLANGE, DLARND + EXTERNAL DLAMCH, DLANGE, DLARND, LSAME * .. * .. External Subroutines .. EXTERNAL DTRTTF, DGEQRF, DGEQLF, DTFSM, DTRSM @@ -218,9 +219,9 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO 100 IALPHA = 1, 3 * - IF ( IALPHA.EQ. 1) THEN + IF ( IALPHA.EQ.1 ) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 2) THEN + ELSE IF ( IALPHA.EQ.2 ) THEN ALPHA = ONE ELSE ALPHA = DLARND( 2, ISEED ) @@ -259,7 +260,7 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, NA DO I = 1, NA - A( I, J) = DLARND( 2, ISEED ) + A( I, J ) = DLARND( 2, ISEED ) END DO END DO * @@ -272,6 +273,20 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL DGEQRF( NA, NA, A, LDA, TAU, + D_WORK_DGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO J = 1, NA + DO I = 1, J + A( I, J ) = A( I, J ) / + + ( 2.0 * A( J, J ) ) + END DO + END DO + END IF +* ELSE * * The case IUPLO.EQ.2 is when SIDE.EQ.'L' @@ -281,6 +296,20 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL DGELQF( NA, NA, A, LDA, TAU, + D_WORK_DGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO I = 1, NA + DO J = 1, I + A( I, J ) = A( I, J ) / + + ( 2.0 * A( I, I ) ) + END DO + END DO + END IF +* END IF * * Store a copy of A in RFP format (in ARF). @@ -294,8 +323,8 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = DLARND( 2, ISEED ) - B2( I, J) = B1( I, J) + B1( I, J ) = DLARND( 2, ISEED ) + B2( I, J ) = B1( I, J ) END DO END DO * @@ -318,24 +347,24 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = B2( I, J ) - B1( I, J ) + B1( I, J ) = B2( I, J ) - B1( I, J ) END DO END DO * - RESULT(1) = DLANGE( 'I', M, N, B1, LDA, + RESULT( 1 ) = DLANGE( 'I', M, N, B1, LDA, + D_WORK_DLANGE ) * - RESULT(1) = RESULT(1) / SQRT( EPS ) - + / MAX ( MAX( M, N), 1 ) + RESULT( 1 ) = RESULT( 1 ) / SQRT( EPS ) + + / MAX ( MAX( M, N ), 1 ) * - IF( RESULT(1).GE.THRESH ) THEN + IF( RESULT( 1 ).GE.THRESH ) THEN IF( NFAIL.EQ.0 ) THEN WRITE( NOUT, * ) WRITE( NOUT, FMT = 9999 ) END IF WRITE( NOUT, FMT = 9997 ) 'DTFSM', + CFORM, SIDE, UPLO, TRANS, DIAG, M, - + N, RESULT(1) + + N, RESULT( 1 ) NFAIL = NFAIL + 1 END IF * diff --git a/TESTING/LIN/sdrvrf3.f b/TESTING/LIN/sdrvrf3.f index 5faae27337..bc01d8473b 100644 --- a/TESTING/LIN/sdrvrf3.f +++ b/TESTING/LIN/sdrvrf3.f @@ -153,8 +153,9 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, REAL RESULT( NTESTS ) * .. * .. External Functions .. + LOGICAL LSAME REAL SLAMCH, SLANGE, SLARND - EXTERNAL SLAMCH, SLANGE, SLARND + EXTERNAL SLAMCH, SLANGE, SLARND, LSAME * .. * .. External Subroutines .. EXTERNAL STRTTF, SGEQRF, SGEQLF, STFSM, STRSM @@ -218,9 +219,9 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO 100 IALPHA = 1, 3 * - IF ( IALPHA.EQ. 1) THEN + IF ( IALPHA.EQ.1 ) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 2) THEN + ELSE IF ( IALPHA.EQ.2 ) THEN ALPHA = ONE ELSE ALPHA = SLARND( 2, ISEED ) @@ -259,7 +260,7 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, NA DO I = 1, NA - A( I, J) = SLARND( 2, ISEED ) + A( I, J ) = SLARND( 2, ISEED ) END DO END DO * @@ -272,6 +273,20 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL SGEQRF( NA, NA, A, LDA, TAU, + S_WORK_SGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO J = 1, NA + DO I = 1, J + A( I, J ) = A( I, J ) / + + ( 2.0 * A( J, J ) ) + END DO + END DO + END IF +* ELSE * * The case IUPLO.EQ.2 is when SIDE.EQ.'L' @@ -281,6 +296,20 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL SGELQF( NA, NA, A, LDA, TAU, + S_WORK_SGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO I = 1, NA + DO J = 1, I + A( I, J ) = A( I, J ) / + + ( 2.0 * A( I, I ) ) + END DO + END DO + END IF +* END IF * * Store a copy of A in RFP format (in ARF). @@ -294,8 +323,8 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = SLARND( 2, ISEED ) - B2( I, J) = B1( I, J) + B1( I, J ) = SLARND( 2, ISEED ) + B2( I, J ) = B1( I, J ) END DO END DO * @@ -318,24 +347,24 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = B2( I, J ) - B1( I, J ) + B1( I, J ) = B2( I, J ) - B1( I, J ) END DO END DO * - RESULT(1) = SLANGE( 'I', M, N, B1, LDA, + RESULT( 1 ) = SLANGE( 'I', M, N, B1, LDA, + S_WORK_SLANGE ) * - RESULT(1) = RESULT(1) / SQRT( EPS ) - + / MAX ( MAX( M, N), 1 ) + RESULT( 1 ) = RESULT( 1 ) / SQRT( EPS ) + + / MAX ( MAX( M, N ), 1 ) * - IF( RESULT(1).GE.THRESH ) THEN + IF( RESULT( 1 ).GE.THRESH ) THEN IF( NFAIL.EQ.0 ) THEN WRITE( NOUT, * ) WRITE( NOUT, FMT = 9999 ) END IF WRITE( NOUT, FMT = 9997 ) 'STFSM', + CFORM, SIDE, UPLO, TRANS, DIAG, M, - + N, RESULT(1) + + N, RESULT( 1 ) NFAIL = NFAIL + 1 END IF * diff --git a/TESTING/LIN/zdrvrf3.f b/TESTING/LIN/zdrvrf3.f index 7a44dba29f..4e55b03ef7 100644 --- a/TESTING/LIN/zdrvrf3.f +++ b/TESTING/LIN/zdrvrf3.f @@ -156,9 +156,10 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Functions .. + LOGICAL LSAME DOUBLE PRECISION DLAMCH, ZLANGE COMPLEX*16 ZLARND - EXTERNAL DLAMCH, ZLARND, ZLANGE + EXTERNAL DLAMCH, ZLARND, ZLANGE, LSAME * .. * .. External Subroutines .. EXTERNAL ZTRTTF, ZGEQRF, ZGEQLF, ZTFSM, ZTRSM @@ -222,9 +223,9 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO 100 IALPHA = 1, 3 * - IF ( IALPHA.EQ. 1) THEN + IF ( IALPHA.EQ.1 ) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 2) THEN + ELSE IF ( IALPHA.EQ.2 ) THEN ALPHA = ONE ELSE ALPHA = ZLARND( 4, ISEED ) @@ -263,7 +264,7 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, NA DO I = 1, NA - A( I, J) = ZLARND( 4, ISEED ) + A( I, J ) = ZLARND( 4, ISEED ) END DO END DO * @@ -276,6 +277,20 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL ZGEQRF( NA, NA, A, LDA, TAU, + Z_WORK_ZGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO J = 1, NA + DO I = 1, J + A( I, J ) = A( I, J ) / + + ( 2.0 * A( J, J ) ) + END DO + END DO + END IF +* ELSE * * The case IUPLO.EQ.2 is when SIDE.EQ.'L' @@ -285,6 +300,20 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL ZGELQF( NA, NA, A, LDA, TAU, + Z_WORK_ZGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO I = 1, NA + DO J = 1, I + A( I, J ) = A( I, J ) / + + ( 2.0 * A( I, I ) ) + END DO + END DO + END IF +* END IF * * After the QR factorization, the diagonal @@ -293,7 +322,8 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * value 1.0E+00. * DO J = 1, NA - A( J, J) = A(J,J) * ZLARND( 5, ISEED ) + A( J, J ) = A( J, J ) * + + ZLARND( 5, ISEED ) END DO * * Store a copy of A in RFP format (in ARF). @@ -307,8 +337,8 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = ZLARND( 4, ISEED ) - B2( I, J) = B1( I, J) + B1( I, J ) = ZLARND( 4, ISEED ) + B2( I, J ) = B1( I, J ) END DO END DO * @@ -331,24 +361,24 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = B2( I, J ) - B1( I, J ) + B1( I, J ) = B2( I, J ) - B1( I, J ) END DO END DO * - RESULT(1) = ZLANGE( 'I', M, N, B1, LDA, + RESULT( 1 ) = ZLANGE( 'I', M, N, B1, LDA, + D_WORK_ZLANGE ) * - RESULT(1) = RESULT(1) / SQRT( EPS ) - + / MAX ( MAX( M, N), 1 ) + RESULT( 1 ) = RESULT( 1 ) / SQRT( EPS ) + + / MAX ( MAX( M, N ), 1 ) * - IF( RESULT(1).GE.THRESH ) THEN + IF( RESULT( 1 ).GE.THRESH ) THEN IF( NFAIL.EQ.0 ) THEN WRITE( NOUT, * ) WRITE( NOUT, FMT = 9999 ) END IF WRITE( NOUT, FMT = 9997 ) 'ZTFSM', + CFORM, SIDE, UPLO, TRANS, DIAG, M, - + N, RESULT(1) + + N, RESULT( 1 ) NFAIL = NFAIL + 1 END IF * From ef12723763577e826f6b0b62767d38ab4ae6f017 Mon Sep 17 00:00:00 2001 From: EduardFedorenkov Date: Fri, 17 Feb 2023 15:32:00 +0700 Subject: [PATCH 87/90] fix bug in complex precision tests (c|z)het21 --- TESTING/EIG/chet21.f | 4 ++-- TESTING/EIG/zhet21.f | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/TESTING/EIG/chet21.f b/TESTING/EIG/chet21.f index a274681c98..1bd35cb41c 100644 --- a/TESTING/EIG/chet21.f +++ b/TESTING/EIG/chet21.f @@ -304,9 +304,9 @@ SUBROUTINE CHET21( ITYPE, UPLO, N, KBAND, A, LDA, D, E, U, LDU, V, 10 CONTINUE * IF( N.GT.1 .AND. KBAND.EQ.1 ) THEN - DO 20 J = 2, N - 1 + DO 20 J = 1, N - 1 CALL CHER2( CUPLO, N, -CMPLX( E( J ) ), U( 1, J ), 1, - $ U( 1, J-1 ), 1, WORK, N ) + $ U( 1, J+1 ), 1, WORK, N ) 20 CONTINUE END IF WNORM = CLANHE( '1', CUPLO, N, WORK, N, RWORK ) diff --git a/TESTING/EIG/zhet21.f b/TESTING/EIG/zhet21.f index d254f85e9c..b927a502fa 100644 --- a/TESTING/EIG/zhet21.f +++ b/TESTING/EIG/zhet21.f @@ -304,9 +304,9 @@ SUBROUTINE ZHET21( ITYPE, UPLO, N, KBAND, A, LDA, D, E, U, LDU, V, 10 CONTINUE * IF( N.GT.1 .AND. KBAND.EQ.1 ) THEN - DO 20 J = 2, N - 1 + DO 20 J = 1, N - 1 CALL ZHER2( CUPLO, N, -DCMPLX( E( J ) ), U( 1, J ), 1, - $ U( 1, J-1 ), 1, WORK, N ) + $ U( 1, J+1 ), 1, WORK, N ) 20 CONTINUE END IF WNORM = ZLANHE( '1', CUPLO, N, WORK, N, RWORK ) From cda0ad3d9aab453457742bdc6d8389920ddcaa1d Mon Sep 17 00:00:00 2001 From: Angelika Schwarz Date: Sun, 5 Mar 2023 18:27:04 +0100 Subject: [PATCH 88/90] Fix typos in documentation of GEJSV --- SRC/cgejsv.f | 6 +++--- SRC/dgejsv.f | 4 ++-- SRC/sgejsv.f | 32 ++++++++++++++++---------------- SRC/zgejsv.f | 6 +++--- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/SRC/cgejsv.f b/SRC/cgejsv.f index e37b25b6b2..062ac182b1 100644 --- a/SRC/cgejsv.f +++ b/SRC/cgejsv.f @@ -304,7 +304,7 @@ *> -> the minimal requirement is LWORK >= 3*N. *> -> For optimal performance, *> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQF, *> CUNMLQ. In general, the optimal length LWORK is computed as *> LWORK >= max(N+LWORK(CGEQP3), N+LWORK(CGESVJ), *> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). @@ -313,7 +313,7 @@ *> -> the minimal requirement is LWORK >= 3*N. *> -> For optimal performance, *> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQF, *> CUNMLQ. In general, the optimal length LWORK is computed as *> LWORK >= max(N+LWORK(CGEQP3), LWORK(CPOCON), N+LWORK(CGESVJ), *> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). @@ -350,7 +350,7 @@ *> *> \param[out] RWORK *> \verbatim -*> RWORK is REAL array, dimension (MAX(7,LWORK)) +*> RWORK is REAL array, dimension (MAX(7,LRWORK)) *> On exit, *> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) *> such that SCALE*SVA(1:N) are the computed singular values diff --git a/SRC/dgejsv.f b/SRC/dgejsv.f index fc13f4a5fb..83d16c30e1 100644 --- a/SRC/dgejsv.f +++ b/SRC/dgejsv.f @@ -224,7 +224,7 @@ *> *> \param[out] U *> \verbatim -*> U is DOUBLE PRECISION array, dimension ( LDU, N ) +*> U is DOUBLE PRECISION array, dimension ( LDU, N ) or ( LDU, M ) *> If JOBU = 'U', then U contains on exit the M-by-N matrix of *> the left singular vectors. *> If JOBU = 'F', then U contains on exit the M-by-M matrix of @@ -271,7 +271,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> WORK is DOUBLE PRECISION array, dimension (MAX(7,LWORK)) *> On exit, if N > 0 .AND. M > 0 (else not referenced), *> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such *> that SCALE*SVA(1:N) are the computed singular values diff --git a/SRC/sgejsv.f b/SRC/sgejsv.f index 82ac6b94b5..923573bdb4 100644 --- a/SRC/sgejsv.f +++ b/SRC/sgejsv.f @@ -224,7 +224,7 @@ *> *> \param[out] U *> \verbatim -*> U is REAL array, dimension ( LDU, N ) +*> U is REAL array, dimension ( LDU, N ) or ( LDU, M ) *> If JOBU = 'U', then U contains on exit the M-by-N matrix of *> the left singular vectors. *> If JOBU = 'F', then U contains on exit the M-by-M matrix of @@ -271,7 +271,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension (LWORK) +*> WORK is REAL array, dimension (MAX(7,LWORK)) *> On exit, *> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such *> that SCALE*SVA(1:N) are the computed singular values @@ -318,36 +318,36 @@ *> LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement. *> ->> For optimal performance (blocked code) the optimal value *> is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal -*> block size for DGEQP3 and DGEQRF. +*> block size for SGEQP3 and SGEQRF. *> In general, optimal LWORK is computed as -*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7). +*> LWORK >= max(2*M+N,N+LWORK(SGEQP3),N+LWORK(SGEQRF), 7). *> -> .. an estimate of the scaled condition number of A is *> required (JOBA='E', 'G'). In this case, LWORK is the maximum *> of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7). *> ->> For optimal performance (blocked code) the optimal value *> is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7). *> In general, the optimal length LWORK is computed as -*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), -*> N+N*N+LWORK(DPOCON),7). +*> LWORK >= max(2*M+N,N+LWORK(SGEQP3),N+LWORK(SGEQRF), +*> N+N*N+LWORK(SPOCON),7). *> *> If SIGMA and the right singular vectors are needed (JOBV = 'V'), *> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7). *> -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7), -*> where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ, -*> DORMLQ. In general, the optimal length LWORK is computed as -*> LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON), -*> N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)). +*> where NB is the optimal block size for SGEQP3, SGEQRF, SGELQF, +*> SORMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(SGEQP3), N+LWORK(SPOCON), +*> N+LWORK(SGELQF), 2*N+LWORK(SGEQRF), N+LWORK(SORMLQ)). *> *> If SIGMA and the left singular vectors are needed *> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7). *> -> For optimal performance: *> if JOBU = 'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7), *> if JOBU = 'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7), -*> where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR. +*> where NB is the optimal block size for SGEQP3, SGEQRF, SORMQR. *> In general, the optimal length LWORK is computed as -*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON), -*> 2*N+LWORK(DGEQRF), N+LWORK(DORMQR)). -*> Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or +*> LWORK >= max(2*M+N,N+LWORK(SGEQP3),N+LWORK(SPOCON), +*> 2*N+LWORK(SGEQRF), N+LWORK(SORMQR)). +*> Here LWORK(SORMQR) equals N*NB (for JOBU = 'U') or *> M*NB (for JOBU = 'F'). *> *> If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and @@ -357,12 +357,12 @@ *> LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6). *> -> For optimal performance, LWORK should be additionally *> larger than N+M*NB, where NB is the optimal block size -*> for DORMQR. +*> for SORMQR. *> \endverbatim *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array, dimension (M+3*N). +*> IWORK is INTEGER array, dimension (MAX(3,M+3*N)). *> On exit, *> IWORK(1) = the numerical rank determined after the initial *> QR factorization with pivoting. See the descriptions diff --git a/SRC/zgejsv.f b/SRC/zgejsv.f index d1106696c0..5134ea8912 100644 --- a/SRC/zgejsv.f +++ b/SRC/zgejsv.f @@ -304,7 +304,7 @@ *> -> the minimal requirement is LWORK >= 3*N. *> -> For optimal performance, *> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQF, *> ZUNMLQ. In general, the optimal length LWORK is computed as *> LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(ZGESVJ), *> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). @@ -313,7 +313,7 @@ *> -> the minimal requirement is LWORK >= 3*N. *> -> For optimal performance, *> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQF, *> ZUNMLQ. In general, the optimal length LWORK is computed as *> LWORK >= max(N+LWORK(ZGEQP3), LWORK(ZPOCON), N+LWORK(ZGESVJ), *> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). @@ -349,7 +349,7 @@ *> *> \param[out] RWORK *> \verbatim -*> RWORK is DOUBLE PRECISION array, dimension (MAX(7,LWORK)) +*> RWORK is DOUBLE PRECISION array, dimension (MAX(7,LRWORK)) *> On exit, *> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) *> such that SCALE*SVA(1:N) are the computed singular values From 4420a7de2dfc21065f4d0b53f72ca6fb4fcf1e8c Mon Sep 17 00:00:00 2001 From: Gabriela Gutierrez Date: Wed, 15 Mar 2023 10:45:20 -0300 Subject: [PATCH 89/90] Add minimum permissions to makefile.yml Signed-off-by: Gabriela Gutierrez --- .github/workflows/makefile.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index 2de3da1b77..a7a6315979 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -31,6 +31,9 @@ on: - '!**CMakeLists.txt' - '!**md' +permissions: + contents: read + env: CC: "gcc" FC: "gfortran" From 984abd056156d5b7f64d6807e50eed46962b7e81 Mon Sep 17 00:00:00 2001 From: Gabriela Gutierrez Date: Wed, 15 Mar 2023 10:45:44 -0300 Subject: [PATCH 90/90] Add minimum permissions to cmake.yml Signed-off-by: Gabriela Gutierrez --- .github/workflows/cmake.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index e9b6d1cff8..6af45ec07e 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -33,6 +33,9 @@ on: - '!**Makefile' - '!**md' +permissions: + contents: read + env: CFLAGS: "-Wall -pedantic" # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)