diff --git a/.appveyor.yml b/.appveyor.yml index 627755ba2a..f5c21313d9 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -11,18 +11,14 @@ skip_commits: # Add [av skip] to commit messages message: /\[av skip\]/ -cache: - - '%APPVEYOR_BUILD_FOLDER%\build' - environment: global: - CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 + CONDA_INSTALL_LOCN: C:\\Miniconda37-x64 install: - call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - - conda config --set auto_update_conda false - - conda config --add channels conda-forge --force - - conda install --yes --quiet flang jom +# - conda config --set auto_update_conda false + - conda install -c conda-forge --yes --quiet flang jom - call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index dee07d154a..6af45ec07e 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -33,6 +33,9 @@ on: - '!**Makefile' - '!**md' +permissions: + contents: read + env: CFLAGS: "-Wall -pedantic" # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) @@ -75,6 +78,7 @@ jobs: cmake -B build -D CMAKE_C_COMPILER="gcc-11" -D CMAKE_Fortran_COMPILER="gfortran-11" + -D USE_FLAT_NAMESPACE:BOOL=ON # - name: Use Unix Makefiles on Windows # if: ${{ matrix.os == 'windows-latest' }} diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index 2de3da1b77..a7a6315979 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -31,6 +31,9 @@ on: - '!**CMakeLists.txt' - '!**md' +permissions: + contents: read + env: CC: "gcc" FC: "gfortran" diff --git a/BLAS/SRC/crotg.f90 b/BLAS/SRC/crotg.f90 index 431f376d84..8704196408 100644 --- a/BLAS/SRC/crotg.f90 +++ b/BLAS/SRC/crotg.f90 @@ -1,4 +1,4 @@ -!> \brief \b CROTG +!> \brief \b CROTG generates a Givens rotation with real cosine and complex sine. ! ! =========== DOCUMENTATION =========== ! @@ -24,8 +24,8 @@ !> = 1 if x = 0 !> c = |a| / sqrt(|a|**2 + |b|**2) !> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2) -!> When a and b are real and r /= 0, the formulas simplify to !> r = sgn(a)*sqrt(|a|**2 + |b|**2) +!> When a and b are real and r /= 0, the formulas simplify to !> c = a / r !> s = b / r !> the same as in SROTG when |a| > |b|. When |b| >= |a|, the @@ -65,12 +65,9 @@ ! Authors: ! ======== ! -!> \author Edward Anderson, Lockheed Martin +!> \author Weslley Pereira, University of Colorado Denver, USA ! -!> \par Contributors: -! ================== -!> -!> Weslley Pereira, University of Colorado Denver, USA +!> \date December 2021 ! !> \ingroup single_blas_level1 ! @@ -79,6 +76,8 @@ !> !> \verbatim !> +!> Based on the algorithm from +!> !> Anderson E. (2017) !> Algorithm 978: Safe Scaling in the Level 1 BLAS !> ACM Trans Math Softw 44:1--28 @@ -108,21 +107,14 @@ subroutine CROTG( a, b, c, s ) 1-minexponent(0._wp), & maxexponent(0._wp)-1 & ) - real(wp), parameter :: rtmin = sqrt( real(radix(0._wp),wp)**max( & - minexponent(0._wp)-1, & - 1-maxexponent(0._wp) & - ) / epsilon(0._wp) ) - real(wp), parameter :: rtmax = sqrt( real(radix(0._wp),wp)**max( & - 1-minexponent(0._wp), & - maxexponent(0._wp)-1 & - ) * epsilon(0._wp) ) + real(wp), parameter :: rtmin = sqrt( safmin ) ! .. ! .. Scalar Arguments .. real(wp) :: c complex(wp) :: a, b, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax complex(wp) :: f, fs, g, gs, r, t ! .. ! .. Intrinsic Functions .. @@ -144,30 +136,43 @@ subroutine CROTG( a, b, c, s ) r = f else if( f == czero ) then c = zero - g1 = max( abs(real(g)), abs(aimag(g)) ) - if( g1 > rtmin .and. g1 < rtmax ) then + if( real(g) == zero ) then + r = abs(aimag(g)) + s = conjg( g ) / r + elseif( aimag(g) == zero ) then + r = abs(real(g)) + s = conjg( g ) / r + else + g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) + if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) - s = conjg( g ) / d - r = d - else +! The following two lines can be replaced by `d = abs( g )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else ! ! Use scaled algorithm ! - u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) - s = conjg( gs ) / d - r = d*u + u = min( safmax, max( safmin, g1 ) ) + gs = g / u +! The following two lines can be replaced by `d = abs( gs )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if end if else f1 = max( abs(real(f)), abs(aimag(f)) ) g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/4 ) if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then ! @@ -176,32 +181,51 @@ subroutine CROTG( a, b, c, s ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = f / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( g ) * ( f / sqrt( f2*h2 ) ) + else + s = conjg( g ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = f / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = f * ( h2 / d ) + end if + s = conjg( g ) * ( f / d ) end if - p = 1 / d - c = f2*p - s = conjg( g )*( f*p ) - r = f*( h2*p ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 / u < rtmin ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -209,19 +233,43 @@ subroutine CROTG( a, b, c, s ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = fs / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = fs / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = fs * ( h2 / d ) + end if + s = conjg( gs ) * ( fs / d ) end if - p = 1 / d - c = ( f2*p )*w - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u + ! Rescale c and r + c = c * w + r = r * u end if end if a = r diff --git a/BLAS/SRC/zrotg.f90 b/BLAS/SRC/zrotg.f90 index dab6c26e23..b3c23be42d 100644 --- a/BLAS/SRC/zrotg.f90 +++ b/BLAS/SRC/zrotg.f90 @@ -1,4 +1,4 @@ -!> \brief \b ZROTG +!> \brief \b ZROTG generates a Givens rotation with real cosine and complex sine. ! ! =========== DOCUMENTATION =========== ! @@ -24,8 +24,8 @@ !> = 1 if x = 0 !> c = |a| / sqrt(|a|**2 + |b|**2) !> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2) -!> When a and b are real and r /= 0, the formulas simplify to !> r = sgn(a)*sqrt(|a|**2 + |b|**2) +!> When a and b are real and r /= 0, the formulas simplify to !> c = a / r !> s = b / r !> the same as in DROTG when |a| > |b|. When |b| >= |a|, the @@ -65,12 +65,9 @@ ! Authors: ! ======== ! -!> \author Edward Anderson, Lockheed Martin +!> \author Weslley Pereira, University of Colorado Denver, USA ! -!> \par Contributors: -! ================== -!> -!> Weslley Pereira, University of Colorado Denver, USA +!> \date December 2021 ! !> \ingroup single_blas_level1 ! @@ -79,6 +76,8 @@ !> !> \verbatim !> +!> Based on the algorithm from +!> !> Anderson E. (2017) !> Algorithm 978: Safe Scaling in the Level 1 BLAS !> ACM Trans Math Softw 44:1--28 @@ -108,21 +107,14 @@ subroutine ZROTG( a, b, c, s ) 1-minexponent(0._wp), & maxexponent(0._wp)-1 & ) - real(wp), parameter :: rtmin = sqrt( real(radix(0._wp),wp)**max( & - minexponent(0._wp)-1, & - 1-maxexponent(0._wp) & - ) / epsilon(0._wp) ) - real(wp), parameter :: rtmax = sqrt( real(radix(0._wp),wp)**max( & - 1-minexponent(0._wp), & - maxexponent(0._wp)-1 & - ) * epsilon(0._wp) ) + real(wp), parameter :: rtmin = sqrt( safmin ) ! .. ! .. Scalar Arguments .. real(wp) :: c complex(wp) :: a, b, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax complex(wp) :: f, fs, g, gs, r, t ! .. ! .. Intrinsic Functions .. @@ -144,30 +136,43 @@ subroutine ZROTG( a, b, c, s ) r = f else if( f == czero ) then c = zero - g1 = max( abs(real(g)), abs(aimag(g)) ) - if( g1 > rtmin .and. g1 < rtmax ) then + if( real(g) == zero ) then + r = abs(aimag(g)) + s = conjg( g ) / r + elseif( aimag(g) == zero ) then + r = abs(real(g)) + s = conjg( g ) / r + else + g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) + if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) - s = conjg( g ) / d - r = d - else +! The following two lines can be replaced by `d = abs( g )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else ! ! Use scaled algorithm ! - u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) - s = conjg( gs ) / d - r = d*u + u = min( safmax, max( safmin, g1 ) ) + gs = g / u +! The following two lines can be replaced by `d = abs( gs )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if end if else f1 = max( abs(real(f)), abs(aimag(f)) ) g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/4 ) if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then ! @@ -176,32 +181,51 @@ subroutine ZROTG( a, b, c, s ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = f / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( g ) * ( f / sqrt( f2*h2 ) ) + else + s = conjg( g ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = f / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = f * ( h2 / d ) + end if + s = conjg( g ) * ( f / d ) end if - p = 1 / d - c = f2*p - s = conjg( g )*( f*p ) - r = f*( h2*p ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 / u < rtmin ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -209,19 +233,43 @@ subroutine ZROTG( a, b, c, s ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = fs / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = fs / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = fs * ( h2 / d ) + end if + s = conjg( gs ) * ( fs / d ) end if - p = 1 / d - c = ( f2*p )*w - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u + ! Rescale c and r + c = c * w + r = r * u end if end if a = r diff --git a/CBLAS/include/cblas.h b/CBLAS/include/cblas.h index f7d411571c..124baf17e8 100644 --- a/CBLAS/include/cblas.h +++ b/CBLAS/include/cblas.h @@ -608,7 +608,11 @@ void cblas_zher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const void *B, const CBLAS_INT ldb, const double beta, void *C, const CBLAS_INT ldc); -void cblas_xerbla(CBLAS_INT p, const char *rout, const char *form, ...); +void +#ifdef HAS_ATTRIBUTE_WEAK_SUPPORT +__attribute__((weak)) +#endif +cblas_xerbla(CBLAS_INT p, const char *rout, const char *form, ...); #ifdef __cplusplus } diff --git a/CBLAS/include/cblas_f77.h b/CBLAS/include/cblas_f77.h index 8d8e929873..9e8fbb6da7 100644 --- a/CBLAS/include/cblas_f77.h +++ b/CBLAS/include/cblas_f77.h @@ -46,162 +46,162 @@ * Level 1 BLAS */ -#define F77_xerbla_base F77_GLOBAL(xerbla,XERBLA) -#define F77_srotg_base F77_GLOBAL(srotg,SROTG) -#define F77_srotmg_base F77_GLOBAL(srotmg,SROTMG) -#define F77_srot_base F77_GLOBAL(srot,SROT) -#define F77_srotm_base F77_GLOBAL(srotm,SROTM) -#define F77_drotg_base F77_GLOBAL(drotg,DROTG) -#define F77_drotmg_base F77_GLOBAL(drotmg,DROTMG) -#define F77_drot_base F77_GLOBAL(drot,DROT) -#define F77_drotm_base F77_GLOBAL(drotm,DROTM) -#define F77_sswap_base F77_GLOBAL(sswap,SSWAP) -#define F77_scopy_base F77_GLOBAL(scopy,SCOPY) -#define F77_saxpy_base F77_GLOBAL(saxpy,SAXPY) -#define F77_isamax_sub_base F77_GLOBAL(isamaxsub,ISAMAXSUB) -#define F77_dswap_base F77_GLOBAL(dswap,DSWAP) -#define F77_dcopy_base F77_GLOBAL(dcopy,DCOPY) -#define F77_daxpy_base F77_GLOBAL(daxpy,DAXPY) -#define F77_idamax_sub_base F77_GLOBAL(idamaxsub,IDAMAXSUB) -#define F77_cswap_base F77_GLOBAL(cswap,CSWAP) -#define F77_ccopy_base F77_GLOBAL(ccopy,CCOPY) -#define F77_caxpy_base F77_GLOBAL(caxpy,CAXPY) -#define F77_icamax_sub_base F77_GLOBAL(icamaxsub,ICAMAXSUB) -#define F77_zswap_base F77_GLOBAL(zswap,ZSWAP) -#define F77_zcopy_base F77_GLOBAL(zcopy,ZCOPY) -#define F77_zaxpy_base F77_GLOBAL(zaxpy,ZAXPY) -#define F77_izamax_sub_base F77_GLOBAL(izamaxsub,IZAMAXSUB) -#define F77_sdot_sub_base F77_GLOBAL(sdotsub,SDOTSUB) -#define F77_ddot_sub_base F77_GLOBAL(ddotsub,DDOTSUB) -#define F77_dsdot_sub_base F77_GLOBAL(dsdotsub,DSDOTSUB) -#define F77_sscal_base F77_GLOBAL(sscal,SSCAL) -#define F77_dscal_base F77_GLOBAL(dscal,DSCAL) -#define F77_cscal_base F77_GLOBAL(cscal,CSCAL) -#define F77_zscal_base F77_GLOBAL(zscal,ZSCAL) -#define F77_csscal_base F77_GLOBAL(csscal,CSSCAL) -#define F77_zdscal_base F77_GLOBAL(zdscal,ZDSCAL) -#define F77_cdotu_sub_base F77_GLOBAL(cdotusub,CDOTUSUB) -#define F77_cdotc_sub_base F77_GLOBAL(cdotcsub,CDOTCSUB) -#define F77_zdotu_sub_base F77_GLOBAL(zdotusub,ZDOTUSUB) -#define F77_zdotc_sub_base F77_GLOBAL(zdotcsub,ZDOTCSUB) -#define F77_snrm2_sub_base F77_GLOBAL(snrm2sub,SNRM2SUB) -#define F77_sasum_sub_base F77_GLOBAL(sasumsub,SASUMSUB) -#define F77_dnrm2_sub_base F77_GLOBAL(dnrm2sub,DNRM2SUB) -#define F77_dasum_sub_base F77_GLOBAL(dasumsub,DASUMSUB) -#define F77_scnrm2_sub_base F77_GLOBAL(scnrm2sub,SCNRM2SUB) -#define F77_scasum_sub_base F77_GLOBAL(scasumsub,SCASUMSUB) -#define F77_dznrm2_sub_base F77_GLOBAL(dznrm2sub,DZNRM2SUB) -#define F77_dzasum_sub_base F77_GLOBAL(dzasumsub,DZASUMSUB) -#define F77_sdsdot_sub_base F77_GLOBAL(sdsdotsub,SDSDOTSUB) -#define F77_crotg_base F77_GLOBAL(crotg, CROTG) -#define F77_csrot_base F77_GLOBAL(csrot, CSROT) -#define F77_zrotg_base F77_GLOBAL(zrotg, ZROTG) -#define F77_zdrot_base F77_GLOBAL(zdrot, ZDROT) -#define F77_scabs1_sub_base F77_GLOBAL(scabs1sub, SCABS1SUB) -#define F77_dcabs1_sub_base F77_GLOBAL(dcabs1sub, DCABS1SUB) +#define F77_xerbla_base F77_GLOBAL(xerbla,XERBLA) +#define F77_srotg_base F77_GLOBAL(srotg,SROTG) +#define F77_srotmg_base F77_GLOBAL(srotmg,SROTMG) +#define F77_srot_base F77_GLOBAL(srot,SROT) +#define F77_srotm_base F77_GLOBAL(srotm,SROTM) +#define F77_drotg_base F77_GLOBAL(drotg,DROTG) +#define F77_drotmg_base F77_GLOBAL(drotmg,DROTMG) +#define F77_drot_base F77_GLOBAL(drot,DROT) +#define F77_drotm_base F77_GLOBAL(drotm,DROTM) +#define F77_sswap_base F77_GLOBAL(sswap,SSWAP) +#define F77_scopy_base F77_GLOBAL(scopy,SCOPY) +#define F77_saxpy_base F77_GLOBAL(saxpy,SAXPY) +#define F77_isamax_sub_base F77_GLOBAL(isamaxsub,ISAMAXSUB) +#define F77_dswap_base F77_GLOBAL(dswap,DSWAP) +#define F77_dcopy_base F77_GLOBAL(dcopy,DCOPY) +#define F77_daxpy_base F77_GLOBAL(daxpy,DAXPY) +#define F77_idamax_sub_base F77_GLOBAL(idamaxsub,IDAMAXSUB) +#define F77_cswap_base F77_GLOBAL(cswap,CSWAP) +#define F77_ccopy_base F77_GLOBAL(ccopy,CCOPY) +#define F77_caxpy_base F77_GLOBAL(caxpy,CAXPY) +#define F77_icamax_sub_base F77_GLOBAL(icamaxsub,ICAMAXSUB) +#define F77_zswap_base F77_GLOBAL(zswap,ZSWAP) +#define F77_zcopy_base F77_GLOBAL(zcopy,ZCOPY) +#define F77_zaxpy_base F77_GLOBAL(zaxpy,ZAXPY) +#define F77_izamax_sub_base F77_GLOBAL(izamaxsub,IZAMAXSUB) +#define F77_sdot_sub_base F77_GLOBAL(sdotsub,SDOTSUB) +#define F77_ddot_sub_base F77_GLOBAL(ddotsub,DDOTSUB) +#define F77_dsdot_sub_base F77_GLOBAL(dsdotsub,DSDOTSUB) +#define F77_sscal_base F77_GLOBAL(sscal,SSCAL) +#define F77_dscal_base F77_GLOBAL(dscal,DSCAL) +#define F77_cscal_base F77_GLOBAL(cscal,CSCAL) +#define F77_zscal_base F77_GLOBAL(zscal,ZSCAL) +#define F77_csscal_base F77_GLOBAL(csscal,CSSCAL) +#define F77_zdscal_base F77_GLOBAL(zdscal,ZDSCAL) +#define F77_cdotu_sub_base F77_GLOBAL(cdotusub,CDOTUSUB) +#define F77_cdotc_sub_base F77_GLOBAL(cdotcsub,CDOTCSUB) +#define F77_zdotu_sub_base F77_GLOBAL(zdotusub,ZDOTUSUB) +#define F77_zdotc_sub_base F77_GLOBAL(zdotcsub,ZDOTCSUB) +#define F77_snrm2_sub_base F77_GLOBAL(snrm2sub,SNRM2SUB) +#define F77_sasum_sub_base F77_GLOBAL(sasumsub,SASUMSUB) +#define F77_dnrm2_sub_base F77_GLOBAL(dnrm2sub,DNRM2SUB) +#define F77_dasum_sub_base F77_GLOBAL(dasumsub,DASUMSUB) +#define F77_scnrm2_sub_base F77_GLOBAL(scnrm2sub,SCNRM2SUB) +#define F77_scasum_sub_base F77_GLOBAL(scasumsub,SCASUMSUB) +#define F77_dznrm2_sub_base F77_GLOBAL(dznrm2sub,DZNRM2SUB) +#define F77_dzasum_sub_base F77_GLOBAL(dzasumsub,DZASUMSUB) +#define F77_sdsdot_sub_base F77_GLOBAL(sdsdotsub,SDSDOTSUB) +#define F77_crotg_base F77_GLOBAL(crotg, CROTG) +#define F77_csrot_base F77_GLOBAL(csrot, CSROT) +#define F77_zrotg_base F77_GLOBAL(zrotg, ZROTG) +#define F77_zdrot_base F77_GLOBAL(zdrot, ZDROT) +#define F77_scabs1_sub_base F77_GLOBAL(scabs1sub, SCABS1SUB) +#define F77_dcabs1_sub_base F77_GLOBAL(dcabs1sub, DCABS1SUB) /* * Level 2 BLAS */ -#define F77_ssymv_base F77_GLOBAL(ssymv,SSYMV) -#define F77_ssbmv_base F77_GLOBAL(ssbmv,SSBMV) -#define F77_sspmv_base F77_GLOBAL(sspmv,SSPMV) -#define F77_sger_base F77_GLOBAL(sger,SGER) -#define F77_ssyr_base F77_GLOBAL(ssyr,SSYR) -#define F77_sspr_base F77_GLOBAL(sspr,SSPR) -#define F77_ssyr2_base F77_GLOBAL(ssyr2,SSYR2) -#define F77_sspr2_base F77_GLOBAL(sspr2,SSPR2) -#define F77_dsymv_base F77_GLOBAL(dsymv,DSYMV) -#define F77_dsbmv_base F77_GLOBAL(dsbmv,DSBMV) -#define F77_dspmv_base F77_GLOBAL(dspmv,DSPMV) -#define F77_dger_base F77_GLOBAL(dger,DGER) -#define F77_dsyr_base F77_GLOBAL(dsyr,DSYR) -#define F77_dspr_base F77_GLOBAL(dspr,DSPR) -#define F77_dsyr2_base F77_GLOBAL(dsyr2,DSYR2) -#define F77_dspr2_base F77_GLOBAL(dspr2,DSPR2) -#define F77_chemv_base F77_GLOBAL(chemv,CHEMV) -#define F77_chbmv_base F77_GLOBAL(chbmv,CHBMV) -#define F77_chpmv_base F77_GLOBAL(chpmv,CHPMV) -#define F77_cgeru_base F77_GLOBAL(cgeru,CGERU) -#define F77_cgerc_base F77_GLOBAL(cgerc,CGERC) -#define F77_cher_base F77_GLOBAL(cher,CHER) -#define F77_chpr_base F77_GLOBAL(chpr,CHPR) -#define F77_cher2_base F77_GLOBAL(cher2,CHER2) -#define F77_chpr2_base F77_GLOBAL(chpr2,CHPR2) -#define F77_zhemv_base F77_GLOBAL(zhemv,ZHEMV) -#define F77_zhbmv_base F77_GLOBAL(zhbmv,ZHBMV) -#define F77_zhpmv_base F77_GLOBAL(zhpmv,ZHPMV) -#define F77_zgeru_base F77_GLOBAL(zgeru,ZGERU) -#define F77_zgerc_base F77_GLOBAL(zgerc,ZGERC) -#define F77_zher_base F77_GLOBAL(zher,ZHER) -#define F77_zhpr_base F77_GLOBAL(zhpr,ZHPR) -#define F77_zher2_base F77_GLOBAL(zher2,ZHER2) -#define F77_zhpr2_base F77_GLOBAL(zhpr2,ZHPR2) -#define F77_sgemv_base F77_GLOBAL(sgemv,SGEMV) -#define F77_sgbmv_base F77_GLOBAL(sgbmv,SGBMV) -#define F77_strmv_base F77_GLOBAL(strmv,STRMV) -#define F77_stbmv_base F77_GLOBAL(stbmv,STBMV) -#define F77_stpmv_base F77_GLOBAL(stpmv,STPMV) -#define F77_strsv_base F77_GLOBAL(strsv,STRSV) -#define F77_stbsv_base F77_GLOBAL(stbsv,STBSV) -#define F77_stpsv_base F77_GLOBAL(stpsv,STPSV) -#define F77_dgemv_base F77_GLOBAL(dgemv,DGEMV) -#define F77_dgbmv_base F77_GLOBAL(dgbmv,DGBMV) -#define F77_dtrmv_base F77_GLOBAL(dtrmv,DTRMV) -#define F77_dtbmv_base F77_GLOBAL(dtbmv,DTBMV) -#define F77_dtpmv_base F77_GLOBAL(dtpmv,DTPMV) -#define F77_dtrsv_base F77_GLOBAL(dtrsv,DTRSV) -#define F77_dtbsv_base F77_GLOBAL(dtbsv,DTBSV) -#define F77_dtpsv_base F77_GLOBAL(dtpsv,DTPSV) -#define F77_cgemv_base F77_GLOBAL(cgemv,CGEMV) -#define F77_cgbmv_base F77_GLOBAL(cgbmv,CGBMV) -#define F77_ctrmv_base F77_GLOBAL(ctrmv,CTRMV) -#define F77_ctbmv_base F77_GLOBAL(ctbmv,CTBMV) -#define F77_ctpmv_base F77_GLOBAL(ctpmv,CTPMV) -#define F77_ctrsv_base F77_GLOBAL(ctrsv,CTRSV) -#define F77_ctbsv_base F77_GLOBAL(ctbsv,CTBSV) -#define F77_ctpsv_base F77_GLOBAL(ctpsv,CTPSV) -#define F77_zgemv_base F77_GLOBAL(zgemv,ZGEMV) -#define F77_zgbmv_base F77_GLOBAL(zgbmv,ZGBMV) -#define F77_ztrmv_base F77_GLOBAL(ztrmv,ZTRMV) -#define F77_ztbmv_base F77_GLOBAL(ztbmv,ZTBMV) -#define F77_ztpmv_base F77_GLOBAL(ztpmv,ZTPMV) -#define F77_ztrsv_base F77_GLOBAL(ztrsv,ZTRSV) -#define F77_ztbsv_base F77_GLOBAL(ztbsv,ZTBSV) -#define F77_ztpsv_base F77_GLOBAL(ztpsv,ZTPSV) +#define F77_ssymv_base F77_GLOBAL(ssymv,SSYMV) +#define F77_ssbmv_base F77_GLOBAL(ssbmv,SSBMV) +#define F77_sspmv_base F77_GLOBAL(sspmv,SSPMV) +#define F77_sger_base F77_GLOBAL(sger,SGER) +#define F77_ssyr_base F77_GLOBAL(ssyr,SSYR) +#define F77_sspr_base F77_GLOBAL(sspr,SSPR) +#define F77_ssyr2_base F77_GLOBAL(ssyr2,SSYR2) +#define F77_sspr2_base F77_GLOBAL(sspr2,SSPR2) +#define F77_dsymv_base F77_GLOBAL(dsymv,DSYMV) +#define F77_dsbmv_base F77_GLOBAL(dsbmv,DSBMV) +#define F77_dspmv_base F77_GLOBAL(dspmv,DSPMV) +#define F77_dger_base F77_GLOBAL(dger,DGER) +#define F77_dsyr_base F77_GLOBAL(dsyr,DSYR) +#define F77_dspr_base F77_GLOBAL(dspr,DSPR) +#define F77_dsyr2_base F77_GLOBAL(dsyr2,DSYR2) +#define F77_dspr2_base F77_GLOBAL(dspr2,DSPR2) +#define F77_chemv_base F77_GLOBAL(chemv,CHEMV) +#define F77_chbmv_base F77_GLOBAL(chbmv,CHBMV) +#define F77_chpmv_base F77_GLOBAL(chpmv,CHPMV) +#define F77_cgeru_base F77_GLOBAL(cgeru,CGERU) +#define F77_cgerc_base F77_GLOBAL(cgerc,CGERC) +#define F77_cher_base F77_GLOBAL(cher,CHER) +#define F77_chpr_base F77_GLOBAL(chpr,CHPR) +#define F77_cher2_base F77_GLOBAL(cher2,CHER2) +#define F77_chpr2_base F77_GLOBAL(chpr2,CHPR2) +#define F77_zhemv_base F77_GLOBAL(zhemv,ZHEMV) +#define F77_zhbmv_base F77_GLOBAL(zhbmv,ZHBMV) +#define F77_zhpmv_base F77_GLOBAL(zhpmv,ZHPMV) +#define F77_zgeru_base F77_GLOBAL(zgeru,ZGERU) +#define F77_zgerc_base F77_GLOBAL(zgerc,ZGERC) +#define F77_zher_base F77_GLOBAL(zher,ZHER) +#define F77_zhpr_base F77_GLOBAL(zhpr,ZHPR) +#define F77_zher2_base F77_GLOBAL(zher2,ZHER2) +#define F77_zhpr2_base F77_GLOBAL(zhpr2,ZHPR2) +#define F77_sgemv_base F77_GLOBAL(sgemv,SGEMV) +#define F77_sgbmv_base F77_GLOBAL(sgbmv,SGBMV) +#define F77_strmv_base F77_GLOBAL(strmv,STRMV) +#define F77_stbmv_base F77_GLOBAL(stbmv,STBMV) +#define F77_stpmv_base F77_GLOBAL(stpmv,STPMV) +#define F77_strsv_base F77_GLOBAL(strsv,STRSV) +#define F77_stbsv_base F77_GLOBAL(stbsv,STBSV) +#define F77_stpsv_base F77_GLOBAL(stpsv,STPSV) +#define F77_dgemv_base F77_GLOBAL(dgemv,DGEMV) +#define F77_dgbmv_base F77_GLOBAL(dgbmv,DGBMV) +#define F77_dtrmv_base F77_GLOBAL(dtrmv,DTRMV) +#define F77_dtbmv_base F77_GLOBAL(dtbmv,DTBMV) +#define F77_dtpmv_base F77_GLOBAL(dtpmv,DTPMV) +#define F77_dtrsv_base F77_GLOBAL(dtrsv,DTRSV) +#define F77_dtbsv_base F77_GLOBAL(dtbsv,DTBSV) +#define F77_dtpsv_base F77_GLOBAL(dtpsv,DTPSV) +#define F77_cgemv_base F77_GLOBAL(cgemv,CGEMV) +#define F77_cgbmv_base F77_GLOBAL(cgbmv,CGBMV) +#define F77_ctrmv_base F77_GLOBAL(ctrmv,CTRMV) +#define F77_ctbmv_base F77_GLOBAL(ctbmv,CTBMV) +#define F77_ctpmv_base F77_GLOBAL(ctpmv,CTPMV) +#define F77_ctrsv_base F77_GLOBAL(ctrsv,CTRSV) +#define F77_ctbsv_base F77_GLOBAL(ctbsv,CTBSV) +#define F77_ctpsv_base F77_GLOBAL(ctpsv,CTPSV) +#define F77_zgemv_base F77_GLOBAL(zgemv,ZGEMV) +#define F77_zgbmv_base F77_GLOBAL(zgbmv,ZGBMV) +#define F77_ztrmv_base F77_GLOBAL(ztrmv,ZTRMV) +#define F77_ztbmv_base F77_GLOBAL(ztbmv,ZTBMV) +#define F77_ztpmv_base F77_GLOBAL(ztpmv,ZTPMV) +#define F77_ztrsv_base F77_GLOBAL(ztrsv,ZTRSV) +#define F77_ztbsv_base F77_GLOBAL(ztbsv,ZTBSV) +#define F77_ztpsv_base F77_GLOBAL(ztpsv,ZTPSV) /* * Level 3 BLAS */ -#define F77_chemm_base F77_GLOBAL(chemm,CHEMM) -#define F77_cherk_base F77_GLOBAL(cherk,CHERK) -#define F77_cher2k_base F77_GLOBAL(cher2k,CHER2K) -#define F77_zhemm_base F77_GLOBAL(zhemm,ZHEMM) -#define F77_zherk_base F77_GLOBAL(zherk,ZHERK) -#define F77_zher2k_base F77_GLOBAL(zher2k,ZHER2K) -#define F77_sgemm_base F77_GLOBAL(sgemm,SGEMM) -#define F77_ssymm_base F77_GLOBAL(ssymm,SSYMM) -#define F77_ssyrk_base F77_GLOBAL(ssyrk,SSYRK) -#define F77_ssyr2k_base F77_GLOBAL(ssyr2k,SSYR2K) -#define F77_strmm_base F77_GLOBAL(strmm,STRMM) -#define F77_strsm_base F77_GLOBAL(strsm,STRSM) -#define F77_dgemm_base F77_GLOBAL(dgemm,DGEMM) -#define F77_dsymm_base F77_GLOBAL(dsymm,DSYMM) -#define F77_dsyrk_base F77_GLOBAL(dsyrk,DSYRK) -#define F77_dsyr2k_base F77_GLOBAL(dsyr2k,DSYR2K) -#define F77_dtrmm_base F77_GLOBAL(dtrmm,DTRMM) -#define F77_dtrsm_base F77_GLOBAL(dtrsm,DTRSM) -#define F77_cgemm_base F77_GLOBAL(cgemm,CGEMM) -#define F77_csymm_base F77_GLOBAL(csymm,CSYMM) -#define F77_csyrk_base F77_GLOBAL(csyrk,CSYRK) -#define F77_csyr2k_base F77_GLOBAL(csyr2k,CSYR2K) -#define F77_ctrmm_base F77_GLOBAL(ctrmm,CTRMM) -#define F77_ctrsm_base F77_GLOBAL(ctrsm,CTRSM) -#define F77_zgemm_base F77_GLOBAL(zgemm,ZGEMM) -#define F77_zsymm_base F77_GLOBAL(zsymm,ZSYMM) -#define F77_zsyrk_base F77_GLOBAL(zsyrk,ZSYRK) -#define F77_zsyr2k_base F77_GLOBAL(zsyr2k,ZSYR2K) -#define F77_ztrmm_base F77_GLOBAL(ztrmm,ZTRMM) -#define F77_ztrsm_base F77_GLOBAL(ztrsm,ZTRSM) +#define F77_chemm_base F77_GLOBAL(chemm,CHEMM) +#define F77_cherk_base F77_GLOBAL(cherk,CHERK) +#define F77_cher2k_base F77_GLOBAL(cher2k,CHER2K) +#define F77_zhemm_base F77_GLOBAL(zhemm,ZHEMM) +#define F77_zherk_base F77_GLOBAL(zherk,ZHERK) +#define F77_zher2k_base F77_GLOBAL(zher2k,ZHER2K) +#define F77_sgemm_base F77_GLOBAL(sgemm,SGEMM) +#define F77_ssymm_base F77_GLOBAL(ssymm,SSYMM) +#define F77_ssyrk_base F77_GLOBAL(ssyrk,SSYRK) +#define F77_ssyr2k_base F77_GLOBAL(ssyr2k,SSYR2K) +#define F77_strmm_base F77_GLOBAL(strmm,STRMM) +#define F77_strsm_base F77_GLOBAL(strsm,STRSM) +#define F77_dgemm_base F77_GLOBAL(dgemm,DGEMM) +#define F77_dsymm_base F77_GLOBAL(dsymm,DSYMM) +#define F77_dsyrk_base F77_GLOBAL(dsyrk,DSYRK) +#define F77_dsyr2k_base F77_GLOBAL(dsyr2k,DSYR2K) +#define F77_dtrmm_base F77_GLOBAL(dtrmm,DTRMM) +#define F77_dtrsm_base F77_GLOBAL(dtrsm,DTRSM) +#define F77_cgemm_base F77_GLOBAL(cgemm,CGEMM) +#define F77_csymm_base F77_GLOBAL(csymm,CSYMM) +#define F77_csyrk_base F77_GLOBAL(csyrk,CSYRK) +#define F77_csyr2k_base F77_GLOBAL(csyr2k,CSYR2K) +#define F77_ctrmm_base F77_GLOBAL(ctrmm,CTRMM) +#define F77_ctrsm_base F77_GLOBAL(ctrsm,CTRSM) +#define F77_zgemm_base F77_GLOBAL(zgemm,ZGEMM) +#define F77_zsymm_base F77_GLOBAL(zsymm,ZSYMM) +#define F77_zsyrk_base F77_GLOBAL(zsyrk,ZSYRK) +#define F77_zsyr2k_base F77_GLOBAL(zsyr2k,ZSYR2K) +#define F77_ztrmm_base F77_GLOBAL(ztrmm,ZTRMM) +#define F77_ztrsm_base F77_GLOBAL(ztrsm,ZTRSM) /* * Level 1 Fortran variadic definitions @@ -210,81 +210,81 @@ /* Single Precision */ - #define F77_srot(...) F77_srot_base(__VA_ARGS__) - #define F77_srotg(...) F77_srotg_base(__VA_ARGS__) - #define F77_srotm(...) F77_srotm_base(__VA_ARGS__) - #define F77_srotmg(...) F77_srotmg_base(__VA_ARGS__) - #define F77_sswap(...) F77_sswap_base(__VA_ARGS__) - #define F77_scopy(...) F77_scopy_base(__VA_ARGS__) - #define F77_saxpy(...) F77_saxpy_base(__VA_ARGS__) - #define F77_sdot_sub(...) F77_sdot_sub_base(__VA_ARGS__) - #define F77_sdsdot_sub(...) F77_sdsdot_sub_base(__VA_ARGS__) - #define F77_sscal(...) F77_sscal_base(__VA_ARGS__) - #define F77_snrm2_sub(...) F77_snrm2_sub_base(__VA_ARGS__) - #define F77_sasum_sub(...) F77_sasum_sub_base(__VA_ARGS__) - #define F77_isamax_sub(...) F77_isamax_sub_base(__VA_ARGS__) - #define F77_scabs1_sub(...) F77_scabs1_sub_base(__VA_ARGS__) +#define F77_srot(...) F77_srot_base(__VA_ARGS__) +#define F77_srotg(...) F77_srotg_base(__VA_ARGS__) +#define F77_srotm(...) F77_srotm_base(__VA_ARGS__) +#define F77_srotmg(...) F77_srotmg_base(__VA_ARGS__) +#define F77_sswap(...) F77_sswap_base(__VA_ARGS__) +#define F77_scopy(...) F77_scopy_base(__VA_ARGS__) +#define F77_saxpy(...) F77_saxpy_base(__VA_ARGS__) +#define F77_sdot_sub(...) F77_sdot_sub_base(__VA_ARGS__) +#define F77_sdsdot_sub(...) F77_sdsdot_sub_base(__VA_ARGS__) +#define F77_sscal(...) F77_sscal_base(__VA_ARGS__) +#define F77_snrm2_sub(...) F77_snrm2_sub_base(__VA_ARGS__) +#define F77_sasum_sub(...) F77_sasum_sub_base(__VA_ARGS__) +#define F77_isamax_sub(...) F77_isamax_sub_base(__VA_ARGS__) +#define F77_scabs1_sub(...) F77_scabs1_sub_base(__VA_ARGS__) /* Double Precision */ - #define F77_drot(...) F77_drot_base(__VA_ARGS__) - #define F77_drotg(...) F77_drotg_base(__VA_ARGS__) - #define F77_drotm(...) F77_drotm_base(__VA_ARGS__) - #define F77_drotmg(...) F77_drotmg_base(__VA_ARGS__) - #define F77_dswap(...) F77_dswap_base(__VA_ARGS__) - #define F77_dcopy(...) F77_dcopy_base(__VA_ARGS__) - #define F77_daxpy(...) F77_daxpy_base(__VA_ARGS__) - #define F77_dswap(...) F77_dswap_base(__VA_ARGS__) - #define F77_dsdot_sub(...) F77_dsdot_sub_base(__VA_ARGS__) - #define F77_ddot_sub(...) F77_ddot_sub_base(__VA_ARGS__) - #define F77_dscal(...) F77_dscal_base(__VA_ARGS__) - #define F77_dnrm2_sub(...) F77_dnrm2_sub_base(__VA_ARGS__) - #define F77_dasum_sub(...) F77_dasum_sub_base(__VA_ARGS__) - #define F77_idamax_sub(...) F77_idamax_sub_base(__VA_ARGS__) - #define F77_dcabs1_sub(...) F77_dcabs1_sub_base(__VA_ARGS__) +#define F77_drot(...) F77_drot_base(__VA_ARGS__) +#define F77_drotg(...) F77_drotg_base(__VA_ARGS__) +#define F77_drotm(...) F77_drotm_base(__VA_ARGS__) +#define F77_drotmg(...) F77_drotmg_base(__VA_ARGS__) +#define F77_dswap(...) F77_dswap_base(__VA_ARGS__) +#define F77_dcopy(...) F77_dcopy_base(__VA_ARGS__) +#define F77_daxpy(...) F77_daxpy_base(__VA_ARGS__) +#define F77_dswap(...) F77_dswap_base(__VA_ARGS__) +#define F77_dsdot_sub(...) F77_dsdot_sub_base(__VA_ARGS__) +#define F77_ddot_sub(...) F77_ddot_sub_base(__VA_ARGS__) +#define F77_dscal(...) F77_dscal_base(__VA_ARGS__) +#define F77_dnrm2_sub(...) F77_dnrm2_sub_base(__VA_ARGS__) +#define F77_dasum_sub(...) F77_dasum_sub_base(__VA_ARGS__) +#define F77_idamax_sub(...) F77_idamax_sub_base(__VA_ARGS__) +#define F77_dcabs1_sub(...) F77_dcabs1_sub_base(__VA_ARGS__) /* Single Complex Precision */ - #define F77_crotg(...) F77_crotg_base(__VA_ARGS__) - #define F77_csrot(...) F77_csrot_base(__VA_ARGS__) - #define F77_cswap(...) F77_cswap_base(__VA_ARGS__) - #define F77_ccopy(...) F77_ccopy_base(__VA_ARGS__) - #define F77_caxpy(...) F77_caxpy_base(__VA_ARGS__) - #define F77_cswap(...) F77_cswap_base(__VA_ARGS__) - #define F77_cdotc_sub(...) F77_cdotc_sub_base(__VA_ARGS__) - #define F77_cdotu_sub(...) F77_cdotu_sub_base(__VA_ARGS__) - #define F77_cscal(...) F77_cscal_base(__VA_ARGS__) - #define F77_icamax_sub(...) F77_icamax_sub_base(__VA_ARGS__) - #define F77_csscal(...) F77_csscal_base(__VA_ARGS__) - #define F77_scnrm2_sub(...) F77_scnrm2_sub_base(__VA_ARGS__) - #define F77_scasum_sub(...) F77_scasum_sub_base(__VA_ARGS__) +#define F77_crotg(...) F77_crotg_base(__VA_ARGS__) +#define F77_csrot(...) F77_csrot_base(__VA_ARGS__) +#define F77_cswap(...) F77_cswap_base(__VA_ARGS__) +#define F77_ccopy(...) F77_ccopy_base(__VA_ARGS__) +#define F77_caxpy(...) F77_caxpy_base(__VA_ARGS__) +#define F77_cswap(...) F77_cswap_base(__VA_ARGS__) +#define F77_cdotc_sub(...) F77_cdotc_sub_base(__VA_ARGS__) +#define F77_cdotu_sub(...) F77_cdotu_sub_base(__VA_ARGS__) +#define F77_cscal(...) F77_cscal_base(__VA_ARGS__) +#define F77_icamax_sub(...) F77_icamax_sub_base(__VA_ARGS__) +#define F77_csscal(...) F77_csscal_base(__VA_ARGS__) +#define F77_scnrm2_sub(...) F77_scnrm2_sub_base(__VA_ARGS__) +#define F77_scasum_sub(...) F77_scasum_sub_base(__VA_ARGS__) /* Double Complex Precision */ - #define F77_zrotg(...) F77_zrotg_base(__VA_ARGS__) - #define F77_zdrot(...) F77_zdrot_base(__VA_ARGS__) - #define F77_zswap(...) F77_zswap_base(__VA_ARGS__) - #define F77_zcopy(...) F77_zcopy_base(__VA_ARGS__) - #define F77_zaxpy(...) F77_zaxpy_base(__VA_ARGS__) - #define F77_zswap(...) F77_zswap_base(__VA_ARGS__) - #define F77_zdotc_sub(...) F77_zdotc_sub_base(__VA_ARGS__) - #define F77_zdotu_sub(...) F77_zdotu_sub_base(__VA_ARGS__) - #define F77_zdscal(...) F77_zdscal_base(__VA_ARGS__) - #define F77_zscal(...) F77_zscal_base(__VA_ARGS__) - #define F77_dznrm2_sub(...) F77_dznrm2_sub_base(__VA_ARGS__) - #define F77_dzasum_sub(...) F77_dzasum_sub_base(__VA_ARGS__) - #define F77_izamax_sub(...) F77_izamax_sub_base(__VA_ARGS__) +#define F77_zrotg(...) F77_zrotg_base(__VA_ARGS__) +#define F77_zdrot(...) F77_zdrot_base(__VA_ARGS__) +#define F77_zswap(...) F77_zswap_base(__VA_ARGS__) +#define F77_zcopy(...) F77_zcopy_base(__VA_ARGS__) +#define F77_zaxpy(...) F77_zaxpy_base(__VA_ARGS__) +#define F77_zswap(...) F77_zswap_base(__VA_ARGS__) +#define F77_zdotc_sub(...) F77_zdotc_sub_base(__VA_ARGS__) +#define F77_zdotu_sub(...) F77_zdotu_sub_base(__VA_ARGS__) +#define F77_zdscal(...) F77_zdscal_base(__VA_ARGS__) +#define F77_zscal(...) F77_zscal_base(__VA_ARGS__) +#define F77_dznrm2_sub(...) F77_dznrm2_sub_base(__VA_ARGS__) +#define F77_dzasum_sub(...) F77_dzasum_sub_base(__VA_ARGS__) +#define F77_izamax_sub(...) F77_izamax_sub_base(__VA_ARGS__) /* * Level 2 Fortran variadic definitions without FCHAR */ - #define F77_sger(...) F77_sger_base(__VA_ARGS__) - #define F77_dger(...) F77_dger_base(__VA_ARGS__) - #define F77_cgerc(...) F77_cgerc_base(__VA_ARGS__) - #define F77_cgeru(...) F77_cgeru_base(__VA_ARGS__) - #define F77_zgerc(...) F77_zgerc_base(__VA_ARGS__) - #define F77_zgeru(...) F77_zgeru_base(__VA_ARGS__) +#define F77_sger(...) F77_sger_base(__VA_ARGS__) +#define F77_dger(...) F77_dger_base(__VA_ARGS__) +#define F77_cgerc(...) F77_cgerc_base(__VA_ARGS__) +#define F77_cgeru(...) F77_cgeru_base(__VA_ARGS__) +#define F77_zgerc(...) F77_zgerc_base(__VA_ARGS__) +#define F77_zgeru(...) F77_zgeru_base(__VA_ARGS__) #ifdef BLAS_FORTRAN_STRLEN_END @@ -294,75 +294,75 @@ /* Single Precision */ - #define F77_sgemv(...) F77_sgemv_base(__VA_ARGS__, 1) - #define F77_sgbmv(...) F77_sgbmv_base(__VA_ARGS__, 1) - #define F77_ssymv(...) F77_ssymv_base(__VA_ARGS__, 1) - #define F77_ssbmv(...) F77_ssbmv_base(__VA_ARGS__, 1) - #define F77_sspmv(...) F77_sspmv_base(__VA_ARGS__, 1) - #define F77_strmv(...) F77_strmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_stbmv(...) F77_stbmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_strsv(...) F77_strsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_stbsv(...) F77_stbsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_stpmv(...) F77_stpmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_stpsv(...) F77_stpsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ssyr(...) F77_ssyr_base(__VA_ARGS__, 1) - #define F77_sspr(...) F77_sspr_base(__VA_ARGS__, 1) - #define F77_sspr2(...) F77_sspr2_base(__VA_ARGS__, 1) - #define F77_ssyr2(...) F77_ssyr2_base(__VA_ARGS__, 1) + #define F77_sgemv(...) F77_sgemv_base(__VA_ARGS__, 1) + #define F77_sgbmv(...) F77_sgbmv_base(__VA_ARGS__, 1) + #define F77_ssymv(...) F77_ssymv_base(__VA_ARGS__, 1) + #define F77_ssbmv(...) F77_ssbmv_base(__VA_ARGS__, 1) + #define F77_sspmv(...) F77_sspmv_base(__VA_ARGS__, 1) + #define F77_strmv(...) F77_strmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_stbmv(...) F77_stbmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_strsv(...) F77_strsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_stbsv(...) F77_stbsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_stpmv(...) F77_stpmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_stpsv(...) F77_stpsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ssyr(...) F77_ssyr_base(__VA_ARGS__, 1) + #define F77_sspr(...) F77_sspr_base(__VA_ARGS__, 1) + #define F77_sspr2(...) F77_sspr2_base(__VA_ARGS__, 1) + #define F77_ssyr2(...) F77_ssyr2_base(__VA_ARGS__, 1) /* Double Precision */ - #define F77_dgemv(...) F77_dgemv_base(__VA_ARGS__, 1) - #define F77_dgbmv(...) F77_dgbmv_base(__VA_ARGS__, 1) - #define F77_dsymv(...) F77_dsymv_base(__VA_ARGS__, 1) - #define F77_dsbmv(...) F77_dsbmv_base(__VA_ARGS__, 1) - #define F77_dspmv(...) F77_dspmv_base(__VA_ARGS__, 1) - #define F77_dtrmv(...) F77_dtrmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtbmv(...) F77_dtbmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtrsv(...) F77_dtrsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtbsv(...) F77_dtbsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtpmv(...) F77_dtpmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dtpsv(...) F77_dtpsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_dsyr(...) F77_dsyr_base(__VA_ARGS__, 1) - #define F77_dspr(...) F77_dspr_base(__VA_ARGS__, 1) - #define F77_dspr2(...) F77_dspr2_base(__VA_ARGS__, 1) - #define F77_dsyr2(...) F77_dsyr2_base(__VA_ARGS__, 1) + #define F77_dgemv(...) F77_dgemv_base(__VA_ARGS__, 1) + #define F77_dgbmv(...) F77_dgbmv_base(__VA_ARGS__, 1) + #define F77_dsymv(...) F77_dsymv_base(__VA_ARGS__, 1) + #define F77_dsbmv(...) F77_dsbmv_base(__VA_ARGS__, 1) + #define F77_dspmv(...) F77_dspmv_base(__VA_ARGS__, 1) + #define F77_dtrmv(...) F77_dtrmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtbmv(...) F77_dtbmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtrsv(...) F77_dtrsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtbsv(...) F77_dtbsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtpmv(...) F77_dtpmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dtpsv(...) F77_dtpsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_dsyr(...) F77_dsyr_base(__VA_ARGS__, 1) + #define F77_dspr(...) F77_dspr_base(__VA_ARGS__, 1) + #define F77_dspr2(...) F77_dspr2_base(__VA_ARGS__, 1) + #define F77_dsyr2(...) F77_dsyr2_base(__VA_ARGS__, 1) /* Single Complex Precision */ - #define F77_cgemv(...) F77_cgemv_base(__VA_ARGS__, 1) - #define F77_cgbmv(...) F77_cgbmv_base(__VA_ARGS__, 1) - #define F77_chemv(...) F77_chemv_base(__VA_ARGS__, 1) - #define F77_chbmv(...) F77_chbmv_base(__VA_ARGS__, 1) - #define F77_chpmv(...) F77_chpmv_base(__VA_ARGS__, 1) - #define F77_ctrmv(...) F77_ctrmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctbmv(...) F77_ctbmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctpmv(...) F77_ctpmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctrsv(...) F77_ctrsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctbsv(...) F77_ctbsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ctpsv(...) F77_ctpsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_cher(...) F77_cher_base(__VA_ARGS__, 1) - #define F77_cher2(...) F77_cher2_base(__VA_ARGS__, 1) - #define F77_chpr(...) F77_chpr_base(__VA_ARGS__, 1) - #define F77_chpr2(...) F77_chpr2_base(__VA_ARGS__, 1) + #define F77_cgemv(...) F77_cgemv_base(__VA_ARGS__, 1) + #define F77_cgbmv(...) F77_cgbmv_base(__VA_ARGS__, 1) + #define F77_chemv(...) F77_chemv_base(__VA_ARGS__, 1) + #define F77_chbmv(...) F77_chbmv_base(__VA_ARGS__, 1) + #define F77_chpmv(...) F77_chpmv_base(__VA_ARGS__, 1) + #define F77_ctrmv(...) F77_ctrmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctbmv(...) F77_ctbmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctpmv(...) F77_ctpmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctrsv(...) F77_ctrsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctbsv(...) F77_ctbsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ctpsv(...) F77_ctpsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_cher(...) F77_cher_base(__VA_ARGS__, 1) + #define F77_cher2(...) F77_cher2_base(__VA_ARGS__, 1) + #define F77_chpr(...) F77_chpr_base(__VA_ARGS__, 1) + #define F77_chpr2(...) F77_chpr2_base(__VA_ARGS__, 1) /* Double Complex Precision */ - #define F77_zgemv(...) F77_zgemv_base(__VA_ARGS__, 1) - #define F77_zgbmv(...) F77_zgbmv_base(__VA_ARGS__, 1) - #define F77_zhemv(...) F77_zhemv_base(__VA_ARGS__, 1) - #define F77_zhbmv(...) F77_zhbmv_base(__VA_ARGS__, 1) - #define F77_zhpmv(...) F77_zhpmv_base(__VA_ARGS__, 1) - #define F77_ztrmv(...) F77_ztrmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztbmv(...) F77_ztbmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztpmv(...) F77_ztpmv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztrsv(...) F77_ztrsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztbsv(...) F77_ztbsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_ztpsv(...) F77_ztpsv_base(__VA_ARGS__, 1, 1, 1) - #define F77_zher(...) F77_zher_base(__VA_ARGS__, 1) - #define F77_zher2(...) F77_zher2_base(__VA_ARGS__, 1) - #define F77_zhpr(...) F77_zhpr_base(__VA_ARGS__, 1) - #define F77_zhpr2(...) F77_zhpr2_base(__VA_ARGS__, 1) + #define F77_zgemv(...) F77_zgemv_base(__VA_ARGS__, 1) + #define F77_zgbmv(...) F77_zgbmv_base(__VA_ARGS__, 1) + #define F77_zhemv(...) F77_zhemv_base(__VA_ARGS__, 1) + #define F77_zhbmv(...) F77_zhbmv_base(__VA_ARGS__, 1) + #define F77_zhpmv(...) F77_zhpmv_base(__VA_ARGS__, 1) + #define F77_ztrmv(...) F77_ztrmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztbmv(...) F77_ztbmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztpmv(...) F77_ztpmv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztrsv(...) F77_ztrsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztbsv(...) F77_ztbsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_ztpsv(...) F77_ztpsv_base(__VA_ARGS__, 1, 1, 1) + #define F77_zher(...) F77_zher_base(__VA_ARGS__, 1) + #define F77_zher2(...) F77_zher2_base(__VA_ARGS__, 1) + #define F77_zhpr(...) F77_zhpr_base(__VA_ARGS__, 1) + #define F77_zhpr2(...) F77_zhpr2_base(__VA_ARGS__, 1) /* * Level 3 Fortran variadic definitions with BLAS_FORTRAN_STRLEN_END @@ -370,123 +370,123 @@ /* Single Precision */ - #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__, 1, 1) - #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__, 1, 1) - #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__, 1, 1) - #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__, 1, 1) - #define F77_strmm(...) F77_strmm_base(__VA_ARGS__, 1, 1, 1, 1) - #define F77_strsm(...) F77_strsm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__, 1, 1) + #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__, 1, 1) + #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__, 1, 1) + #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__, 1, 1) + #define F77_strmm(...) F77_strmm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_strsm(...) F77_strsm_base(__VA_ARGS__, 1, 1, 1, 1) /* Double Precision */ - #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__, 1, 1) - #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__, 1, 1) - #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__, 1, 1) - #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__, 1, 1) - #define F77_dtrmm(...) F77_dtrmm_base(__VA_ARGS__, 1, 1, 1, 1) - #define F77_dtrsm(...) F77_dtrsm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__, 1, 1) + #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__, 1, 1) + #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__, 1, 1) + #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__, 1, 1) + #define F77_dtrmm(...) F77_dtrmm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_dtrsm(...) F77_dtrsm_base(__VA_ARGS__, 1, 1, 1, 1) /* Single Complex Precision */ - #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__, 1, 1) - #define F77_csymm(...) F77_csymm_base(__VA_ARGS__, 1, 1) - #define F77_chemm(...) F77_chemm_base(__VA_ARGS__, 1, 1) - #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__, 1, 1) - #define F77_cherk(...) F77_cherk_base(__VA_ARGS__, 1, 1) - #define F77_csyr2k(...) F77_csyr2k_base(__VA_ARGS__, 1, 1) - #define F77_cher2k(...) F77_cher2k_base(__VA_ARGS__, 1, 1) - #define F77_ctrmm(...) F77_ctrmm_base(__VA_ARGS__, 1, 1, 1, 1) - #define F77_ctrsm(...) F77_ctrsm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__, 1, 1) + #define F77_csymm(...) F77_csymm_base(__VA_ARGS__, 1, 1) + #define F77_chemm(...) F77_chemm_base(__VA_ARGS__, 1, 1) + #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__, 1, 1) + #define F77_cherk(...) F77_cherk_base(__VA_ARGS__, 1, 1) + #define F77_csyr2k(...) F77_csyr2k_base(__VA_ARGS__, 1, 1) + #define F77_cher2k(...) F77_cher2k_base(__VA_ARGS__, 1, 1) + #define F77_ctrmm(...) F77_ctrmm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_ctrsm(...) F77_ctrsm_base(__VA_ARGS__, 1, 1, 1, 1) /* Double Complex Precision */ - #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__, 1, 1) - #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__, 1, 1) - #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__, 1, 1) - #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__, 1, 1) - #define F77_zherk(...) F77_zherk_base(__VA_ARGS__, 1, 1) - #define F77_zsyr2k(...) F77_zsyr2k_base(__VA_ARGS__, 1, 1) - #define F77_zher2k(...) F77_zher2k_base(__VA_ARGS__, 1, 1) - #define F77_ztrmm(...) F77_ztrmm_base(__VA_ARGS__, 1, 1, 1, 1) - #define F77_ztrsm(...) F77_ztrsm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__, 1, 1) + #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__, 1, 1) + #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__, 1, 1) + #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__, 1, 1) + #define F77_zherk(...) F77_zherk_base(__VA_ARGS__, 1, 1) + #define F77_zsyr2k(...) F77_zsyr2k_base(__VA_ARGS__, 1, 1) + #define F77_zher2k(...) F77_zher2k_base(__VA_ARGS__, 1, 1) + #define F77_ztrmm(...) F77_ztrmm_base(__VA_ARGS__, 1, 1, 1, 1) + #define F77_ztrsm(...) F77_ztrsm_base(__VA_ARGS__, 1, 1, 1, 1) #else - + /* * Level 2 Fortran variadic definitions without BLAS_FORTRAN_STRLEN_END */ /* Single Precision */ - #define F77_sgemv(...) F77_sgemv_base(__VA_ARGS__) - #define F77_sgbmv(...) F77_sgbmv_base(__VA_ARGS__) - #define F77_ssymv(...) F77_ssymv_base(__VA_ARGS__) - #define F77_ssbmv(...) F77_ssbmv_base(__VA_ARGS__) - #define F77_sspmv(...) F77_sspmv_base(__VA_ARGS__) - #define F77_strmv(...) F77_strmv_base(__VA_ARGS__) - #define F77_stbmv(...) F77_stbmv_base(__VA_ARGS__) - #define F77_strsv(...) F77_strsv_base(__VA_ARGS__) - #define F77_stbsv(...) F77_stbsv_base(__VA_ARGS__) - #define F77_stpmv(...) F77_stpmv_base(__VA_ARGS__) - #define F77_stpsv(...) F77_stpsv_base(__VA_ARGS__) - #define F77_ssyr(...) F77_ssyr_base(__VA_ARGS__) - #define F77_sspr(...) F77_sspr_base(__VA_ARGS__) - #define F77_sspr2(...) F77_sspr2_base(__VA_ARGS__) - #define F77_ssyr2(...) F77_ssyr2_base(__VA_ARGS__) + #define F77_sgemv(...) F77_sgemv_base(__VA_ARGS__) + #define F77_sgbmv(...) F77_sgbmv_base(__VA_ARGS__) + #define F77_ssymv(...) F77_ssymv_base(__VA_ARGS__) + #define F77_ssbmv(...) F77_ssbmv_base(__VA_ARGS__) + #define F77_sspmv(...) F77_sspmv_base(__VA_ARGS__) + #define F77_strmv(...) F77_strmv_base(__VA_ARGS__) + #define F77_stbmv(...) F77_stbmv_base(__VA_ARGS__) + #define F77_strsv(...) F77_strsv_base(__VA_ARGS__) + #define F77_stbsv(...) F77_stbsv_base(__VA_ARGS__) + #define F77_stpmv(...) F77_stpmv_base(__VA_ARGS__) + #define F77_stpsv(...) F77_stpsv_base(__VA_ARGS__) + #define F77_ssyr(...) F77_ssyr_base(__VA_ARGS__) + #define F77_sspr(...) F77_sspr_base(__VA_ARGS__) + #define F77_sspr2(...) F77_sspr2_base(__VA_ARGS__) + #define F77_ssyr2(...) F77_ssyr2_base(__VA_ARGS__) /* Double Precision */ - #define F77_dgemv(...) F77_dgemv_base(__VA_ARGS__) - #define F77_dgbmv(...) F77_dgbmv_base(__VA_ARGS__) - #define F77_dsymv(...) F77_dsymv_base(__VA_ARGS__) - #define F77_dsbmv(...) F77_dsbmv_base(__VA_ARGS__) - #define F77_dspmv(...) F77_dspmv_base(__VA_ARGS__) - #define F77_dtrmv(...) F77_dtrmv_base(__VA_ARGS__) - #define F77_dtbmv(...) F77_dtbmv_base(__VA_ARGS__) - #define F77_dtrsv(...) F77_dtrsv_base(__VA_ARGS__) - #define F77_dtbsv(...) F77_dtbsv_base(__VA_ARGS__) - #define F77_dtpmv(...) F77_dtpmv_base(__VA_ARGS__) - #define F77_dtpsv(...) F77_dtpsv_base(__VA_ARGS__) - #define F77_dsyr(...) F77_dsyr_base(__VA_ARGS__) - #define F77_dspr(...) F77_dspr_base(__VA_ARGS__) - #define F77_dspr2(...) F77_dspr2_base(__VA_ARGS__) - #define F77_dsyr2(...) F77_dsyr2_base(__VA_ARGS__) + #define F77_dgemv(...) F77_dgemv_base(__VA_ARGS__) + #define F77_dgbmv(...) F77_dgbmv_base(__VA_ARGS__) + #define F77_dsymv(...) F77_dsymv_base(__VA_ARGS__) + #define F77_dsbmv(...) F77_dsbmv_base(__VA_ARGS__) + #define F77_dspmv(...) F77_dspmv_base(__VA_ARGS__) + #define F77_dtrmv(...) F77_dtrmv_base(__VA_ARGS__) + #define F77_dtbmv(...) F77_dtbmv_base(__VA_ARGS__) + #define F77_dtrsv(...) F77_dtrsv_base(__VA_ARGS__) + #define F77_dtbsv(...) F77_dtbsv_base(__VA_ARGS__) + #define F77_dtpmv(...) F77_dtpmv_base(__VA_ARGS__) + #define F77_dtpsv(...) F77_dtpsv_base(__VA_ARGS__) + #define F77_dsyr(...) F77_dsyr_base(__VA_ARGS__) + #define F77_dspr(...) F77_dspr_base(__VA_ARGS__) + #define F77_dspr2(...) F77_dspr2_base(__VA_ARGS__) + #define F77_dsyr2(...) F77_dsyr2_base(__VA_ARGS__) /* Single Complex Precision */ - #define F77_cgemv(...) F77_cgemv_base(__VA_ARGS__) - #define F77_cgbmv(...) F77_cgbmv_base(__VA_ARGS__) - #define F77_chemv(...) F77_chemv_base(__VA_ARGS__) - #define F77_chbmv(...) F77_chbmv_base(__VA_ARGS__) - #define F77_chpmv(...) F77_chpmv_base(__VA_ARGS__) - #define F77_ctrmv(...) F77_ctrmv_base(__VA_ARGS__) - #define F77_ctbmv(...) F77_ctbmv_base(__VA_ARGS__) - #define F77_ctpmv(...) F77_ctpmv_base(__VA_ARGS__) - #define F77_ctrsv(...) F77_ctrsv_base(__VA_ARGS__) - #define F77_ctbsv(...) F77_ctbsv_base(__VA_ARGS__) - #define F77_ctpsv(...) F77_ctpsv_base(__VA_ARGS__) - #define F77_cher(...) F77_cher_base(__VA_ARGS__) - #define F77_cher2(...) F77_cher2_base(__VA_ARGS__) - #define F77_chpr(...) F77_chpr_base(__VA_ARGS__) - #define F77_chpr2(...) F77_chpr2_base(__VA_ARGS__) + #define F77_cgemv(...) F77_cgemv_base(__VA_ARGS__) + #define F77_cgbmv(...) F77_cgbmv_base(__VA_ARGS__) + #define F77_chemv(...) F77_chemv_base(__VA_ARGS__) + #define F77_chbmv(...) F77_chbmv_base(__VA_ARGS__) + #define F77_chpmv(...) F77_chpmv_base(__VA_ARGS__) + #define F77_ctrmv(...) F77_ctrmv_base(__VA_ARGS__) + #define F77_ctbmv(...) F77_ctbmv_base(__VA_ARGS__) + #define F77_ctpmv(...) F77_ctpmv_base(__VA_ARGS__) + #define F77_ctrsv(...) F77_ctrsv_base(__VA_ARGS__) + #define F77_ctbsv(...) F77_ctbsv_base(__VA_ARGS__) + #define F77_ctpsv(...) F77_ctpsv_base(__VA_ARGS__) + #define F77_cher(...) F77_cher_base(__VA_ARGS__) + #define F77_cher2(...) F77_cher2_base(__VA_ARGS__) + #define F77_chpr(...) F77_chpr_base(__VA_ARGS__) + #define F77_chpr2(...) F77_chpr2_base(__VA_ARGS__) /* Double Complex Precision */ - #define F77_zgemv(...) F77_zgemv_base(__VA_ARGS__) - #define F77_zgbmv(...) F77_zgbmv_base(__VA_ARGS__) - #define F77_zhemv(...) F77_zhemv_base(__VA_ARGS__) - #define F77_zhbmv(...) F77_zhbmv_base(__VA_ARGS__) - #define F77_zhpmv(...) F77_zhpmv_base(__VA_ARGS__) - #define F77_ztrmv(...) F77_ztrmv_base(__VA_ARGS__) - #define F77_ztbmv(...) F77_ztbmv_base(__VA_ARGS__) - #define F77_ztpmv(...) F77_ztpmv_base(__VA_ARGS__) - #define F77_ztrsv(...) F77_ztrsv_base(__VA_ARGS__) - #define F77_ztbsv(...) F77_ztbsv_base(__VA_ARGS__) - #define F77_ztpsv(...) F77_ztpsv_base(__VA_ARGS__) - #define F77_zher(...) F77_zher_base(__VA_ARGS__) - #define F77_zher2(...) F77_zher2_base(__VA_ARGS__) - #define F77_zhpr(...) F77_zhpr_base(__VA_ARGS__) - #define F77_zhpr2(...) F77_zhpr2_base(__VA_ARGS__) + #define F77_zgemv(...) F77_zgemv_base(__VA_ARGS__) + #define F77_zgbmv(...) F77_zgbmv_base(__VA_ARGS__) + #define F77_zhemv(...) F77_zhemv_base(__VA_ARGS__) + #define F77_zhbmv(...) F77_zhbmv_base(__VA_ARGS__) + #define F77_zhpmv(...) F77_zhpmv_base(__VA_ARGS__) + #define F77_ztrmv(...) F77_ztrmv_base(__VA_ARGS__) + #define F77_ztbmv(...) F77_ztbmv_base(__VA_ARGS__) + #define F77_ztpmv(...) F77_ztpmv_base(__VA_ARGS__) + #define F77_ztrsv(...) F77_ztrsv_base(__VA_ARGS__) + #define F77_ztbsv(...) F77_ztbsv_base(__VA_ARGS__) + #define F77_ztpsv(...) F77_ztpsv_base(__VA_ARGS__) + #define F77_zher(...) F77_zher_base(__VA_ARGS__) + #define F77_zher2(...) F77_zher2_base(__VA_ARGS__) + #define F77_zhpr(...) F77_zhpr_base(__VA_ARGS__) + #define F77_zhpr2(...) F77_zhpr2_base(__VA_ARGS__) /* * Level 3 Fortran variadic definitions without BLAS_FORTRAN_STRLEN_END @@ -494,45 +494,45 @@ /* Single Precision */ - #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__) - #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__) - #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__) - #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__) - #define F77_strmm(...) F77_strmm_base(__VA_ARGS__) - #define F77_strsm(...) F77_strsm_base(__VA_ARGS__) + #define F77_sgemm(...) F77_sgemm_base(__VA_ARGS__) + #define F77_ssymm(...) F77_ssymm_base(__VA_ARGS__) + #define F77_ssyrk(...) F77_ssyrk_base(__VA_ARGS__) + #define F77_ssyr2k(...) F77_ssyr2k_base(__VA_ARGS__) + #define F77_strmm(...) F77_strmm_base(__VA_ARGS__) + #define F77_strsm(...) F77_strsm_base(__VA_ARGS__) /* Double Precision */ - #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__) - #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__) - #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__) - #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__) - #define F77_dtrmm(...) F77_dtrmm_base(__VA_ARGS__) - #define F77_dtrsm(...) F77_dtrsm_base(__VA_ARGS__) + #define F77_dgemm(...) F77_dgemm_base(__VA_ARGS__) + #define F77_dsymm(...) F77_dsymm_base(__VA_ARGS__) + #define F77_dsyrk(...) F77_dsyrk_base(__VA_ARGS__) + #define F77_dsyr2k(...) F77_dsyr2k_base(__VA_ARGS__) + #define F77_dtrmm(...) F77_dtrmm_base(__VA_ARGS__) + #define F77_dtrsm(...) F77_dtrsm_base(__VA_ARGS__) /* Single Complex Precision */ - #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__) - #define F77_csymm(...) F77_csymm_base(__VA_ARGS__) - #define F77_chemm(...) F77_chemm_base(__VA_ARGS__) - #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__) - #define F77_cherk(...) F77_cherk_base(__VA_ARGS__) - #define F77_csyr2k(...) F77_csyr2k_base(__VA_ARGS__) - #define F77_cher2k(...) F77_cher2k_base(__VA_ARGS__) - #define F77_ctrmm(...) F77_ctrmm_base(__VA_ARGS__) - #define F77_ctrsm(...) F77_ctrsm_base(__VA_ARGS__) + #define F77_cgemm(...) F77_cgemm_base(__VA_ARGS__) + #define F77_csymm(...) F77_csymm_base(__VA_ARGS__) + #define F77_chemm(...) F77_chemm_base(__VA_ARGS__) + #define F77_csyrk(...) F77_csyrk_base(__VA_ARGS__) + #define F77_cherk(...) F77_cherk_base(__VA_ARGS__) + #define F77_csyr2k(...) F77_csyr2k_base(__VA_ARGS__) + #define F77_cher2k(...) F77_cher2k_base(__VA_ARGS__) + #define F77_ctrmm(...) F77_ctrmm_base(__VA_ARGS__) + #define F77_ctrsm(...) F77_ctrsm_base(__VA_ARGS__) /* Double Complex Precision */ - #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__) - #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__) - #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__) - #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__) - #define F77_zherk(...) F77_zherk_base(__VA_ARGS__) - #define F77_zsyr2k(...) F77_zsyr2k_base(__VA_ARGS__) - #define F77_zher2k(...) F77_zher2k_base(__VA_ARGS__) - #define F77_ztrmm(...) F77_ztrmm_base(__VA_ARGS__) - #define F77_ztrsm(...) F77_ztrsm_base(__VA_ARGS__) + #define F77_zgemm(...) F77_zgemm_base(__VA_ARGS__) + #define F77_zsymm(...) F77_zsymm_base(__VA_ARGS__) + #define F77_zhemm(...) F77_zhemm_base(__VA_ARGS__) + #define F77_zsyrk(...) F77_zsyrk_base(__VA_ARGS__) + #define F77_zherk(...) F77_zherk_base(__VA_ARGS__) + #define F77_zsyr2k(...) F77_zsyr2k_base(__VA_ARGS__) + #define F77_zher2k(...) F77_zher2k_base(__VA_ARGS__) + #define F77_ztrmm(...) F77_ztrmm_base(__VA_ARGS__) + #define F77_ztrsm(...) F77_ztrsm_base(__VA_ARGS__) #endif @@ -545,85 +545,90 @@ extern "C" { #endif #ifdef BLAS_FORTRAN_STRLEN_END - #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__, 1) + #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__, 1) #else - #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__) + #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__) +#endif +void +#ifdef HAS_ATTRIBUTE_WEAK_SUPPORT +__attribute__((weak)) +#endif +F77_xerbla_base(FCHAR, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t #endif -void F77_xerbla_base(FCHAR, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +); + /* * Level 1 Fortran Prototypes */ /* Single Precision */ - void F77_srot_base(FINT, float *, FINT, float *, FINT, const float *, const float *); - void F77_srotg_base(float *,float *,float *,float *); - void F77_srotm_base( FINT, float *, FINT, float *, FINT, const float *); - void F77_srotmg_base(float *,float *,float *,const float *, float *); - void F77_sswap_base( FINT, float *, FINT, float *, FINT); - void F77_scopy_base( FINT, const float *, FINT, float *, FINT); - void F77_saxpy_base( FINT, const float *, const float *, FINT, float *, FINT); - void F77_sdot_sub_base(FINT, const float *, FINT, const float *, FINT, float *); - void F77_sdsdot_sub_base( FINT, const float *, const float *, FINT, const float *, FINT, float *); - void F77_sscal_base( FINT, const float *, float *, FINT); - void F77_snrm2_sub_base( FINT, const float *, FINT, float *); - void F77_sasum_sub_base( FINT, const float *, FINT, float *); - void F77_isamax_sub_base( FINT, const float * , FINT, FINT2); +void F77_srot_base(FINT, float *, FINT, float *, FINT, const float *, const float *); +void F77_srotg_base(float *,float *,float *,float *); +void F77_srotm_base(FINT, float *, FINT, float *, FINT, const float *); +void F77_srotmg_base(float *,float *,float *,const float *, float *); +void F77_sswap_base(FINT, float *, FINT, float *, FINT); +void F77_scopy_base(FINT, const float *, FINT, float *, FINT); +void F77_saxpy_base(FINT, const float *, const float *, FINT, float *, FINT); +void F77_sdot_sub_base(FINT, const float *, FINT, const float *, FINT, float *); +void F77_sdsdot_sub_base(FINT, const float *, const float *, FINT, const float *, FINT, float *); +void F77_sscal_base(FINT, const float *, float *, FINT); +void F77_snrm2_sub_base(FINT, const float *, FINT, float *); +void F77_sasum_sub_base(FINT, const float *, FINT, float *); +void F77_isamax_sub_base(FINT, const float * , FINT, FINT2); /* Double Precision */ - void F77_drot_base(FINT, double *, FINT, double *, FINT, const double *, const double *); - void F77_drotg_base(double *,double *,double *,double *); - void F77_drotm_base( FINT, double *, FINT, double *, FINT, const double *); - void F77_drotmg_base(double *,double *,double *,const double *, double *); - void F77_dswap_base( FINT, double *, FINT, double *, FINT); - void F77_dcopy_base( FINT, const double *, FINT, double *, FINT); - void F77_daxpy_base( FINT, const double *, const double *, FINT, double *, FINT); - void F77_dswap_base( FINT, double *, FINT, double *, FINT); - void F77_dsdot_sub_base(FINT, const float *, FINT, const float *, FINT, double *); - void F77_ddot_sub_base( FINT, const double *, FINT, const double *, FINT, double *); - void F77_dscal_base( FINT, const double *, double *, FINT); - void F77_dnrm2_sub_base( FINT, const double *, FINT, double *); - void F77_dasum_sub_base( FINT, const double *, FINT, double *); - void F77_idamax_sub_base( FINT, const double * , FINT, FINT2); +void F77_drot_base(FINT, double *, FINT, double *, FINT, const double *, const double *); +void F77_drotg_base(double *,double *,double *,double *); +void F77_drotm_base(FINT, double *, FINT, double *, FINT, const double *); +void F77_drotmg_base(double *,double *,double *,const double *, double *); +void F77_dswap_base(FINT, double *, FINT, double *, FINT); +void F77_dcopy_base(FINT, const double *, FINT, double *, FINT); +void F77_daxpy_base(FINT, const double *, const double *, FINT, double *, FINT); +void F77_dswap_base(FINT, double *, FINT, double *, FINT); +void F77_dsdot_sub_base(FINT, const float *, FINT, const float *, FINT, double *); +void F77_ddot_sub_base(FINT, const double *, FINT, const double *, FINT, double *); +void F77_dscal_base(FINT, const double *, double *, FINT); +void F77_dnrm2_sub_base(FINT, const double *, FINT, double *); +void F77_dasum_sub_base(FINT, const double *, FINT, double *); +void F77_idamax_sub_base(FINT, const double * , FINT, FINT2); /* Single Complex Precision */ - void F77_crotg_base(void *, void *, float *, void *); - void F77_csrot_base(FINT, void *X, FINT, void *, FINT, const float *, const float *); - void F77_cswap_base( FINT, void *, FINT, void *, FINT); - void F77_ccopy_base( FINT, const void *, FINT, void *, FINT); - void F77_caxpy_base( FINT, const void *, const void *, FINT, void *, FINT); - void F77_cswap_base( FINT, void *, FINT, void *, FINT); - void F77_cdotc_sub_base( FINT, const void *, FINT, const void *, FINT, void *); - void F77_cdotu_sub_base( FINT, const void *, FINT, const void *, FINT, void *); - void F77_cscal_base( FINT, const void *, void *, FINT); - void F77_icamax_sub_base( FINT, const void *, FINT, FINT2); - void F77_csscal_base( FINT, const float *, void *, FINT); - void F77_scnrm2_sub_base( FINT, const void *, FINT, float *); - void F77_scasum_sub_base( FINT, const void *, FINT, float *); - void F77_scabs1_sub_base( const void *, float *); +void F77_crotg_base(void *, void *, float *, void *); +void F77_csrot_base(FINT, void *X, FINT, void *, FINT, const float *, const float *); +void F77_cswap_base(FINT, void *, FINT, void *, FINT); +void F77_ccopy_base(FINT, const void *, FINT, void *, FINT); +void F77_caxpy_base(FINT, const void *, const void *, FINT, void *, FINT); +void F77_cswap_base(FINT, void *, FINT, void *, FINT); +void F77_cdotc_sub_base(FINT, const void *, FINT, const void *, FINT, void *); +void F77_cdotu_sub_base(FINT, const void *, FINT, const void *, FINT, void *); +void F77_cscal_base(FINT, const void *, void *, FINT); +void F77_icamax_sub_base(FINT, const void *, FINT, FINT2); +void F77_csscal_base(FINT, const float *, void *, FINT); +void F77_scnrm2_sub_base(FINT, const void *, FINT, float *); +void F77_scasum_sub_base(FINT, const void *, FINT, float *); +void F77_scabs1_sub_base(const void *, float *); /* Double Complex Precision */ - void F77_zrotg_base(void *, void *, double *, void *); - void F77_zdrot_base(FINT, void *X, FINT, void *, FINT, const double *, const double *); - void F77_zswap_base( FINT, void *, FINT, void *, FINT); - void F77_zcopy_base( FINT, const void *, FINT, void *, FINT); - void F77_zaxpy_base( FINT, const void *, const void *, FINT, void *, FINT); - void F77_zswap_base( FINT, void *, FINT, void *, FINT); - void F77_zdotc_sub_base( FINT, const void *, FINT, const void *, FINT, void *); - void F77_zdotu_sub_base( FINT, const void *, FINT, const void *, FINT, void *); - void F77_zdscal_base( FINT, const double *, void *, FINT); - void F77_zscal_base( FINT, const void *, void *, FINT); - void F77_dznrm2_sub_base( FINT, const void *, FINT, double *); - void F77_dzasum_sub_base( FINT, const void *, FINT, double *); - void F77_izamax_sub_base( FINT, const void *, FINT, FINT2); - void F77_dcabs1_sub_base( const void *, double *); +void F77_zrotg_base(void *, void *, double *, void *); +void F77_zdrot_base(FINT, void *X, FINT, void *, FINT, const double *, const double *); +void F77_zswap_base(FINT, void *, FINT, void *, FINT); +void F77_zcopy_base(FINT, const void *, FINT, void *, FINT); +void F77_zaxpy_base(FINT, const void *, const void *, FINT, void *, FINT); +void F77_zswap_base(FINT, void *, FINT, void *, FINT); +void F77_zdotc_sub_base(FINT, const void *, FINT, const void *, FINT, void *); +void F77_zdotu_sub_base(FINT, const void *, FINT, const void *, FINT, void *); +void F77_zdscal_base(FINT, const double *, void *, FINT); +void F77_zscal_base(FINT, const void *, void *, FINT); +void F77_dznrm2_sub_base(FINT, const void *, FINT, double *); +void F77_dzasum_sub_base(FINT, const void *, FINT, double *); +void F77_izamax_sub_base(FINT, const void *, FINT, FINT2); +void F77_dcabs1_sub_base(const void *, double *); /* * Level 2 Fortran Prototypes @@ -631,321 +636,321 @@ void F77_xerbla_base(FCHAR, void * /* Single Precision */ - void F77_sgemv_base(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_sgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ssymv_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ssbmv_base(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_sspmv_base(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_strmv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_stbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_strsv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_stbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_stpmv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_stpsv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_sger_base( FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT); - void F77_ssyr_base(FCHAR, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_sspr_base(FCHAR, FINT, const float *, const float *, FINT, float * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_sspr2_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ssyr2_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +void F77_sgemv_base(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_sgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ssymv_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ssbmv_base(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_sspmv_base(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_strmv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_stbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_strsv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_stbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_stpmv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_stpsv_base(FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_sger_base(FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT); +void F77_ssyr_base(FCHAR, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_sspr_base(FCHAR, FINT, const float *, const float *, FINT, float * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_sspr2_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ssyr2_base(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); /* Double Precision */ - void F77_dgemv_base(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dsymv_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dsbmv_base(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dspmv_base(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dtrmv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtrsv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtpmv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dtpsv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_dger_base( FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT); - void F77_dsyr_base(FCHAR, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dspr_base(FCHAR, FINT, const double *, const double *, FINT, double * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dspr2_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_dsyr2_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +void F77_dgemv_base(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dsymv_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dsbmv_base(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dspmv_base(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dtrmv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtrsv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtpmv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dtpsv_base(FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_dger_base(FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT); +void F77_dsyr_base(FCHAR, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dspr_base(FCHAR, FINT, const double *, const double *, FINT, double * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dspr2_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_dsyr2_base(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); /* Single Complex Precision */ - void F77_cgemv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_cgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chemv_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chbmv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chpmv_base(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ctrmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctpmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctrsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ctpsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_cgerc_base( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_cgeru_base( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_cher_base(FCHAR, FINT, const float *, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_cher2_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chpr_base(FCHAR, FINT, const float *, const void *, FINT, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_chpr2_base(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +void F77_cgemv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_cgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chemv_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chbmv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chpmv_base(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ctrmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctpmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctrsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ctpsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_cgerc_base(FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); +void F77_cgeru_base(FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); +void F77_cher_base(FCHAR, FINT, const float *, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_cher2_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chpr_base(FCHAR, FINT, const float *, const void *, FINT, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_chpr2_base(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); /* Double Complex Precision */ - void F77_zgemv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhemv_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhbmv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhpmv_base(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_ztrmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztpmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztrsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_ztpsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t - #endif - ); - void F77_zgerc_base( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_zgeru_base( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_zher_base(FCHAR, FINT, const double *, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zher2_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhpr_base(FCHAR, FINT, const double *, const void *, FINT, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); - void F77_zhpr2_base(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void * - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t - #endif - ); +void F77_zgemv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zgbmv_base(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhemv_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhbmv_base(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhpmv_base(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_ztrmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztbmv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztpmv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztrsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztbsv_base(FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_ztpsv_base(FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t +#endif +); +void F77_zgerc_base(FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); +void F77_zgeru_base(FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); +void F77_zher_base(FCHAR, FINT, const double *, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zher2_base(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhpr_base(FCHAR, FINT, const double *, const void *, FINT, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); +void F77_zhpr2_base(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void * +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t +#endif +); /* * Level 3 Fortran Prototypes @@ -953,165 +958,165 @@ void F77_xerbla_base(FCHAR, void * /* Single Precision */ - void F77_sgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ssymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ssyrk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ssyr2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_strmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); - void F77_strsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); +void F77_sgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ssymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ssyrk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ssyr2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_strmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); +void F77_strsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); /* Double Precision */ - void F77_dgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_dsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_dsyrk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_dsyr2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_dtrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); - void F77_dtrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); +void F77_dgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_dsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_dsyrk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_dsyr2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_dtrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); +void F77_dtrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); /* Single Complex Precision */ - void F77_cgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_csymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_chemm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_csyrk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_cherk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_csyr2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_cher2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ctrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); - void F77_ctrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); +void F77_cgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_csymm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_chemm_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_csyrk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_cherk_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_csyr2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_cher2k_base(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ctrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); +void F77_ctrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); /* Double Complex Precision */ - void F77_zgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zhemm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zsyrk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zherk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zsyr2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_zher2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t - #endif - ); - void F77_ztrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); - void F77_ztrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT - #ifdef BLAS_FORTRAN_STRLEN_END - , size_t, size_t, size_t, size_t - #endif - ); +void F77_zgemm_base(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zsymm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zhemm_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zsyrk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zherk_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zsyr2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_zher2k_base(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t +#endif +); +void F77_ztrmm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); +void F77_ztrsm_base(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT +#ifdef BLAS_FORTRAN_STRLEN_END + , size_t, size_t, size_t, size_t +#endif +); #ifdef __cplusplus } diff --git a/CBLAS/testing/CMakeLists.txt b/CBLAS/testing/CMakeLists.txt index 9b8cfaeb17..a2a41e05e3 100644 --- a/CBLAS/testing/CMakeLists.txt +++ b/CBLAS/testing/CMakeLists.txt @@ -52,6 +52,12 @@ if(BUILD_SINGLE) add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) + if(HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xscblat1 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xscblat2 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xscblat3 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + endif() + target_link_libraries(xscblat1 ${CBLASLIB}) target_link_libraries(xscblat2 ${CBLASLIB}) target_link_libraries(xscblat3 ${CBLASLIB}) @@ -66,6 +72,12 @@ if(BUILD_DOUBLE) add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) + if(HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xdcblat1 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xdcblat2 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xdcblat3 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + endif() + target_link_libraries(xdcblat1 ${CBLASLIB}) target_link_libraries(xdcblat2 ${CBLASLIB}) target_link_libraries(xdcblat3 ${CBLASLIB}) @@ -80,6 +92,12 @@ if(BUILD_COMPLEX) add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) + if(HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xccblat1 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xccblat2 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xccblat3 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + endif() + target_link_libraries(xccblat1 ${CBLASLIB} ${BLAS_LIBRARIES}) target_link_libraries(xccblat2 ${CBLASLIB}) target_link_libraries(xccblat3 ${CBLASLIB}) @@ -94,6 +112,12 @@ if(BUILD_COMPLEX16) add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/test_include/cblas_test.h) + if(HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xzcblat1 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xzcblat2 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + target_compile_definitions(xzcblat3 PRIVATE HAS_ATTRIBUTE_WEAK_SUPPORT) + endif() + target_link_libraries(xzcblat1 ${CBLASLIB}) target_link_libraries(xzcblat2 ${CBLASLIB}) target_link_libraries(xzcblat3 ${CBLASLIB}) diff --git a/CBLAS/testing/c_c2chke.c b/CBLAS/testing/c_c2chke.c index 33a0261f51..e46bcd1493 100644 --- a/CBLAS/testing/c_c2chke.c +++ b/CBLAS/testing/c_c2chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -32,7 +32,7 @@ void F77_c2chke(char *rout) { ALPHA[2] = {0.0,0.0}, BETA[2] = {0.0,0.0}, RALPHA = 0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_c3chke.c b/CBLAS/testing/c_c3chke.c index 7057c7a2c4..b5bbc753cd 100644 --- a/CBLAS/testing/c_c3chke.c +++ b/CBLAS/testing/c_c3chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -32,7 +32,7 @@ void F77_c3chke(char * rout) { ALPHA[2] = {0.0,0.0}, BETA[2] = {0.0,0.0}, RALPHA = 0.0, RBETA = 0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_cblas1.c b/CBLAS/testing/c_cblas1.c index 81a5b843b5..75b5b73836 100644 --- a/CBLAS/testing/c_cblas1.c +++ b/CBLAS/testing/c_cblas1.c @@ -8,67 +8,67 @@ */ #include "cblas_test.h" #include "cblas.h" -void F77_caxpy(const int *N, const void *alpha, void *X, - const int *incX, void *Y, const int *incY) +void F77_caxpy(const CBLAS_INT *N, const void *alpha, void *X, + const CBLAS_INT *incX, void *Y, const CBLAS_INT *incY) { cblas_caxpy(*N, alpha, X, *incX, Y, *incY); return; } -void F77_ccopy(const int *N, void *X, const int *incX, - void *Y, const int *incY) +void F77_ccopy(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY) { cblas_ccopy(*N, X, *incX, Y, *incY); return; } -void F77_cdotc(const int *N, void *X, const int *incX, - void *Y, const int *incY, void *dotc) +void F77_cdotc(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY, void *dotc) { cblas_cdotc_sub(*N, X, *incX, Y, *incY, dotc); return; } -void F77_cdotu(const int *N, void *X, const int *incX, - void *Y, const int *incY,void *dotu) +void F77_cdotu(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY,void *dotu) { cblas_cdotu_sub(*N, X, *incX, Y, *incY, dotu); return; } -void F77_cscal(const int *N, const void * *alpha, void *X, - const int *incX) +void F77_cscal(const CBLAS_INT *N, const void * *alpha, void *X, + const CBLAS_INT *incX) { cblas_cscal(*N, alpha, X, *incX); return; } -void F77_csscal(const int *N, const float *alpha, void *X, - const int *incX) +void F77_csscal(const CBLAS_INT *N, const float *alpha, void *X, + const CBLAS_INT *incX) { cblas_csscal(*N, *alpha, X, *incX); return; } -void F77_cswap( const int *N, void *X, const int *incX, - void *Y, const int *incY) +void F77_cswap( const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY) { cblas_cswap(*N,X,*incX,Y,*incY); return; } -int F77_icamax(const int *N, const void *X, const int *incX) +CBLAS_INT F77_icamax(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_icamax(*N, X, *incX)+1); } -float F77_scnrm2(const int *N, const void *X, const int *incX) +float F77_scnrm2(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { return cblas_scnrm2(*N, X, *incX); } -float F77_scasum(const int *N, void *X, const int *incX) +float F77_scasum(const CBLAS_INT *N, void *X, const CBLAS_INT *incX) { return cblas_scasum(*N, X, *incX); } diff --git a/CBLAS/testing/c_cblas2.c b/CBLAS/testing/c_cblas2.c index bb7e644854..b4c8734c0d 100644 --- a/CBLAS/testing/c_cblas2.c +++ b/CBLAS/testing/c_cblas2.c @@ -8,13 +8,13 @@ #include "cblas.h" #include "cblas_test.h" -void F77_cgemv(int *layout, char *transp, int *m, int *n, +void F77_cgemv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, const void *alpha, - CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx, - const void *beta, void *y, int *incy) { + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, const void *x, CBLAS_INT *incx, + const void *beta, void *y, CBLAS_INT *incy) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -38,13 +38,13 @@ void F77_cgemv(int *layout, char *transp, int *m, int *n, *m, *n, alpha, a, *lda, x, *incx, beta, y, *incy ); } -void F77_cgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *x, int *incx, - CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, int *incy) { +void F77_cgbmv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, CBLAS_INT *kl, CBLAS_INT *ku, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, + CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy) { CBLAS_TEST_COMPLEX *A; - int i,j,irow,jcol,LDA; + CBLAS_INT i,j,irow,jcol,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -85,12 +85,12 @@ void F77_cgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, *incx, beta, y, *incy ); } -void F77_cgeru(int *layout, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy, - CBLAS_TEST_COMPLEX *a, int *lda){ +void F77_cgeru(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda){ CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -114,11 +114,11 @@ void F77_cgeru(int *layout, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, cblas_cgeru( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); } -void F77_cgerc(int *layout, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy, - CBLAS_TEST_COMPLEX *a, int *lda) { +void F77_cgerc(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -142,12 +142,12 @@ void F77_cgerc(int *layout, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, cblas_cgerc( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); } -void F77_chemv(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx, CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, int *incy){ +void F77_chemv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx, CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -172,13 +172,13 @@ void F77_chemv(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, beta, y, *incy ); } -void F77_chbmv(int *layout, char *uplow, int *n, int *k, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *y, int *incy){ +void F77_chbmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_COMPLEX *A; -int i,irow,j,jcol,LDA; +CBLAS_INT i,irow,j,jcol,LDA; CBLAS_UPLO uplo; @@ -236,12 +236,12 @@ int i,irow,j,jcol,LDA; beta, y, *incy ); } -void F77_chpmv(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, int *incx, - CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, int *incy){ +void F77_chpmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, + CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_COMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -292,11 +292,11 @@ void F77_chpmv(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, *incy ); } -void F77_ctbmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx) { +void F77_ctbmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -355,12 +355,12 @@ void F77_ctbmv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctbmv(UNDEFINED, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ctbsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx) { +void F77_ctbsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -419,10 +419,10 @@ void F77_ctbsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctbsv(UNDEFINED, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ctpmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, int *incx) { +void F77_ctpmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -474,10 +474,10 @@ void F77_ctpmv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctpmv( UNDEFINED, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_ctpsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, int *incx) { +void F77_ctpsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_COMPLEX *ap, CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -529,11 +529,11 @@ void F77_ctpsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctpsv( UNDEFINED, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_ctrmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx) { +void F77_ctrmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -558,11 +558,11 @@ void F77_ctrmv(int *layout, char *uplow, char *transp, char *diagn, else cblas_ctrmv(UNDEFINED, uplo, trans, diag, *n, a, *lda, x, *incx); } -void F77_ctrsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *x, - int *incx) { +void F77_ctrsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_COMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -588,10 +588,10 @@ void F77_ctrsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ctrsv(UNDEFINED, uplo, trans, diag, *n, a, *lda, x, *incx ); } -void F77_chpr(int *layout, char *uplow, int *n, float *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *ap) { +void F77_chpr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *ap) { CBLAS_TEST_COMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -663,11 +663,11 @@ void F77_chpr(int *layout, char *uplow, int *n, float *alpha, cblas_chpr(UNDEFINED, uplo, *n, *alpha, x, *incx, ap ); } -void F77_chpr2(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy, +void F77_chpr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy, CBLAS_TEST_COMPLEX *ap) { CBLAS_TEST_COMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -740,10 +740,10 @@ void F77_chpr2(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, cblas_chpr2( UNDEFINED, uplo, *n, alpha, x, *incx, y, *incy, ap ); } -void F77_cher(int *layout, char *uplow, int *n, float *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *a, int *lda) { +void F77_cher(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -772,12 +772,12 @@ void F77_cher(int *layout, char *uplow, int *n, float *alpha, cblas_cher( UNDEFINED, uplo, *n, *alpha, x, *incx, a, *lda ); } -void F77_cher2(int *layout, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha, - CBLAS_TEST_COMPLEX *x, int *incx, CBLAS_TEST_COMPLEX *y, int *incy, - CBLAS_TEST_COMPLEX *a, int *lda) { +void F77_cher2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, + CBLAS_TEST_COMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_COMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_COMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); diff --git a/CBLAS/testing/c_cblas3.c b/CBLAS/testing/c_cblas3.c index e0e41230f4..f758dc9ebc 100644 --- a/CBLAS/testing/c_cblas3.c +++ b/CBLAS/testing/c_cblas3.c @@ -11,13 +11,13 @@ #define TEST_ROW_MJR 1 #define UNDEFINED -1 -void F77_cgemm(int *layout, char *transpa, char *transpb, int *m, int *n, - int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_cgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_INT *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_COMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_TRANSPOSE transa, transb; get_transpose_type(transpa, &transa); @@ -87,13 +87,13 @@ void F77_cgemm(int *layout, char *transpa, char *transpb, int *m, int *n, cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_chemm(int *layout, char *rtlf, char *uplow, int *m, int *n, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_chemm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_COMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -151,13 +151,13 @@ void F77_chemm(int *layout, char *rtlf, char *uplow, int *m, int *n, cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_csymm(int *layout, char *rtlf, char *uplow, int *m, int *n, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_csymm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_COMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -206,11 +206,11 @@ void F77_csymm(int *layout, char *rtlf, char *uplow, int *m, int *n, beta, c, *ldc ); } -void F77_cherk(int *layout, char *uplow, char *transp, int *n, int *k, - float *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - float *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_cherk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + float *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + float *beta, CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; CBLAS_TEST_COMPLEX *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -262,11 +262,11 @@ void F77_cherk(int *layout, char *uplow, char *transp, int *n, int *k, c, *ldc ); } -void F77_csyrk(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) { +void F77_csyrk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; CBLAS_TEST_COMPLEX *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -317,11 +317,11 @@ void F77_csyrk(int *layout, char *uplow, char *transp, int *n, int *k, cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta, c, *ldc ); } -void F77_cher2k(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, float *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_cher2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, float *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; CBLAS_TEST_COMPLEX *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -381,11 +381,11 @@ void F77_cher2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_csyr2k(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, - CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, - CBLAS_TEST_COMPLEX *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_csyr2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_COMPLEX *beta, + CBLAS_TEST_COMPLEX *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; CBLAS_TEST_COMPLEX *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -445,10 +445,10 @@ void F77_csyr2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_ctrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, - int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { - int i,j,LDA,LDB; +void F77_ctrmm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, + CBLAS_INT *lda, CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; CBLAS_TEST_COMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; @@ -504,10 +504,10 @@ void F77_ctrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, a, *lda, b, *ldb); } -void F77_ctrsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, - int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { - int i,j,LDA,LDB; +void F77_ctrsm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, + CBLAS_INT *lda, CBLAS_TEST_COMPLEX *b, CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; CBLAS_TEST_COMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; diff --git a/CBLAS/testing/c_d2chke.c b/CBLAS/testing/c_d2chke.c index 73e8aceec5..90d9c3ca70 100644 --- a/CBLAS/testing/c_d2chke.c +++ b/CBLAS/testing/c_d2chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -30,7 +30,7 @@ void F77_d2chke(char *rout) { X[2] = {0.0,0.0}, Y[2] = {0.0,0.0}, ALPHA=0.0, BETA=0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_d3chke.c b/CBLAS/testing/c_d3chke.c index 9a1ba3cc59..c9056c85ce 100644 --- a/CBLAS/testing/c_d3chke.c +++ b/CBLAS/testing/c_d3chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -30,7 +30,7 @@ void F77_d3chke(char *rout) { B[2] = {0.0,0.0}, C[2] = {0.0,0.0}, ALPHA=0.0, BETA=0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_dblas1.c b/CBLAS/testing/c_dblas1.c index deb7851257..cf03549fa8 100644 --- a/CBLAS/testing/c_dblas1.c +++ b/CBLAS/testing/c_dblas1.c @@ -8,32 +8,32 @@ */ #include "cblas_test.h" #include "cblas.h" -double F77_dasum(const int *N, double *X, const int *incX) +double F77_dasum(const CBLAS_INT *N, double *X, const CBLAS_INT *incX) { return cblas_dasum(*N, X, *incX); } -void F77_daxpy(const int *N, const double *alpha, const double *X, - const int *incX, double *Y, const int *incY) +void F77_daxpy(const CBLAS_INT *N, const double *alpha, const double *X, + const CBLAS_INT *incX, double *Y, const CBLAS_INT *incY) { cblas_daxpy(*N, *alpha, X, *incX, Y, *incY); return; } -void F77_dcopy(const int *N, double *X, const int *incX, - double *Y, const int *incY) +void F77_dcopy(const CBLAS_INT *N, double *X, const CBLAS_INT *incX, + double *Y, const CBLAS_INT *incY) { cblas_dcopy(*N, X, *incX, Y, *incY); return; } -double F77_ddot(const int *N, const double *X, const int *incX, - const double *Y, const int *incY) +double F77_ddot(const CBLAS_INT *N, const double *X, const CBLAS_INT *incX, + const double *Y, const CBLAS_INT *incY) { return cblas_ddot(*N, X, *incX, Y, *incY); } -double F77_dnrm2(const int *N, const double *X, const int *incX) +double F77_dnrm2(const CBLAS_INT *N, const double *X, const CBLAS_INT *incX) { return cblas_dnrm2(*N, X, *incX); } @@ -44,39 +44,39 @@ void F77_drotg( double *a, double *b, double *c, double *s) return; } -void F77_drot( const int *N, double *X, const int *incX, double *Y, - const int *incY, const double *c, const double *s) +void F77_drot( const CBLAS_INT *N, double *X, const CBLAS_INT *incX, double *Y, + const CBLAS_INT *incY, const double *c, const double *s) { cblas_drot(*N,X,*incX,Y,*incY,*c,*s); return; } -void F77_dscal(const int *N, const double *alpha, double *X, - const int *incX) +void F77_dscal(const CBLAS_INT *N, const double *alpha, double *X, + const CBLAS_INT *incX) { cblas_dscal(*N, *alpha, X, *incX); return; } -void F77_dswap( const int *N, double *X, const int *incX, - double *Y, const int *incY) +void F77_dswap( const CBLAS_INT *N, double *X, const CBLAS_INT *incX, + double *Y, const CBLAS_INT *incY) { cblas_dswap(*N,X,*incX,Y,*incY); return; } -double F77_dzasum(const int *N, void *X, const int *incX) +double F77_dzasum(const CBLAS_INT *N, void *X, const CBLAS_INT *incX) { return cblas_dzasum(*N, X, *incX); } -double F77_dznrm2(const int *N, const void *X, const int *incX) +double F77_dznrm2(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { return cblas_dznrm2(*N, X, *incX); } -int F77_idamax(const int *N, const double *X, const int *incX) +CBLAS_INT F77_idamax(const CBLAS_INT *N, const double *X, const CBLAS_INT *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_idamax(*N, X, *incX)+1); diff --git a/CBLAS/testing/c_dblas2.c b/CBLAS/testing/c_dblas2.c index 835ba19f34..7a3e278e10 100644 --- a/CBLAS/testing/c_dblas2.c +++ b/CBLAS/testing/c_dblas2.c @@ -8,12 +8,12 @@ #include "cblas.h" #include "cblas_test.h" -void F77_dgemv(int *layout, char *transp, int *m, int *n, double *alpha, - double *a, int *lda, double *x, int *incx, double *beta, - double *y, int *incy ) { +void F77_dgemv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, double *alpha, + double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx, double *beta, + double *y, CBLAS_INT *incy ) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -35,11 +35,11 @@ void F77_dgemv(int *layout, char *transp, int *m, int *n, double *alpha, *m, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy ); } -void F77_dger(int *layout, int *m, int *n, double *alpha, double *x, int *incx, - double *y, int *incy, double *a, int *lda ) { +void F77_dger(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, double *alpha, double *x, CBLAS_INT *incx, + double *y, CBLAS_INT *incy, double *a, CBLAS_INT *lda ) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -60,10 +60,10 @@ void F77_dger(int *layout, int *m, int *n, double *alpha, double *x, int *incx, cblas_dger( CblasColMajor, *m, *n, *alpha, x, *incx, y, *incy, a, *lda ); } -void F77_dtrmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, double *a, int *lda, double *x, int *incx) { +void F77_dtrmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -88,10 +88,10 @@ void F77_dtrmv(int *layout, char *uplow, char *transp, char *diagn, } } -void F77_dtrsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, double *a, int *lda, double *x, int *incx ) { +void F77_dtrsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx ) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -112,11 +112,11 @@ void F77_dtrsv(int *layout, char *uplow, char *transp, char *diagn, else cblas_dtrsv(CblasColMajor, uplo, trans, diag, *n, a, *lda, x, *incx ); } -void F77_dsymv(int *layout, char *uplow, int *n, double *alpha, double *a, - int *lda, double *x, int *incx, double *beta, double *y, - int *incy) { +void F77_dsymv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *a, + CBLAS_INT *lda, double *x, CBLAS_INT *incx, double *beta, double *y, + CBLAS_INT *incy) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -136,10 +136,10 @@ void F77_dsymv(int *layout, char *uplow, int *n, double *alpha, double *a, *beta, y, *incy ); } -void F77_dsyr(int *layout, char *uplow, int *n, double *alpha, double *x, - int *incx, double *a, int *lda) { +void F77_dsyr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *x, + CBLAS_INT *incx, double *a, CBLAS_INT *lda) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -160,10 +160,10 @@ void F77_dsyr(int *layout, char *uplow, int *n, double *alpha, double *x, cblas_dsyr(CblasColMajor, uplo, *n, *alpha, x, *incx, a, *lda); } -void F77_dsyr2(int *layout, char *uplow, int *n, double *alpha, double *x, - int *incx, double *y, int *incy, double *a, int *lda) { +void F77_dsyr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *x, + CBLAS_INT *incx, double *y, CBLAS_INT *incy, double *a, CBLAS_INT *lda) { double *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -184,12 +184,12 @@ void F77_dsyr2(int *layout, char *uplow, int *n, double *alpha, double *x, cblas_dsyr2(CblasColMajor, uplo, *n, *alpha, x, *incx, y, *incy, a, *lda); } -void F77_dgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, - double *alpha, double *a, int *lda, double *x, int *incx, - double *beta, double *y, int *incy ) { +void F77_dgbmv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, CBLAS_INT *kl, CBLAS_INT *ku, + double *alpha, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx, + double *beta, double *y, CBLAS_INT *incy ) { double *A; - int i,irow,j,jcol,LDA; + CBLAS_INT i,irow,j,jcol,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -222,10 +222,10 @@ void F77_dgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, a, *lda, x, *incx, *beta, y, *incy ); } -void F77_dtbmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, double *a, int *lda, double *x, int *incx) { +void F77_dtbmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx) { double *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -268,10 +268,10 @@ void F77_dtbmv(int *layout, char *uplow, char *transp, char *diagn, cblas_dtbmv(CblasColMajor, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_dtbsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, double *a, int *lda, double *x, int *incx) { +void F77_dtbsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx) { double *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -314,11 +314,11 @@ void F77_dtbsv(int *layout, char *uplow, char *transp, char *diagn, cblas_dtbsv(CblasColMajor, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_dsbmv(int *layout, char *uplow, int *n, int *k, double *alpha, - double *a, int *lda, double *x, int *incx, double *beta, - double *y, int *incy) { +void F77_dsbmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_INT *k, double *alpha, + double *a, CBLAS_INT *lda, double *x, CBLAS_INT *incx, double *beta, + double *y, CBLAS_INT *incy) { double *A; - int i,j,irow,jcol,LDA; + CBLAS_INT i,j,irow,jcol,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -359,10 +359,10 @@ void F77_dsbmv(int *layout, char *uplow, int *n, int *k, double *alpha, *beta, y, *incy ); } -void F77_dspmv(int *layout, char *uplow, int *n, double *alpha, double *ap, - double *x, int *incx, double *beta, double *y, int *incy) { +void F77_dspmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *ap, + double *x, CBLAS_INT *incx, double *beta, double *y, CBLAS_INT *incy) { double *A,*AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -397,10 +397,10 @@ void F77_dspmv(int *layout, char *uplow, int *n, double *alpha, double *ap, *incy ); } -void F77_dtpmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, double *ap, double *x, int *incx) { +void F77_dtpmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, double *ap, double *x, CBLAS_INT *incx) { double *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -437,10 +437,10 @@ void F77_dtpmv(int *layout, char *uplow, char *transp, char *diagn, cblas_dtpmv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_dtpsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, double *ap, double *x, int *incx) { +void F77_dtpsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, double *ap, double *x, CBLAS_INT *incx) { double *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -478,10 +478,10 @@ void F77_dtpsv(int *layout, char *uplow, char *transp, char *diagn, cblas_dtpsv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_dspr(int *layout, char *uplow, int *n, double *alpha, double *x, - int *incx, double *ap ){ +void F77_dspr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *x, + CBLAS_INT *incx, double *ap ){ double *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -530,10 +530,10 @@ void F77_dspr(int *layout, char *uplow, int *n, double *alpha, double *x, cblas_dspr( CblasColMajor, uplo, *n, *alpha, x, *incx, ap ); } -void F77_dspr2(int *layout, char *uplow, int *n, double *alpha, double *x, - int *incx, double *y, int *incy, double *ap ){ +void F77_dspr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, double *x, + CBLAS_INT *incx, double *y, CBLAS_INT *incy, double *ap ){ double *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); diff --git a/CBLAS/testing/c_dblas3.c b/CBLAS/testing/c_dblas3.c index 8a2c37688d..49c5a698fd 100644 --- a/CBLAS/testing/c_dblas3.c +++ b/CBLAS/testing/c_dblas3.c @@ -11,12 +11,12 @@ #define TEST_ROW_MJR 1 #define UNDEFINED -1 -void F77_dgemm(int *layout, char *transpa, char *transpb, int *m, int *n, - int *k, double *alpha, double *a, int *lda, double *b, int *ldb, - double *beta, double *c, int *ldc ) { +void F77_dgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_INT *k, double *alpha, double *a, CBLAS_INT *lda, double *b, CBLAS_INT *ldb, + double *beta, double *c, CBLAS_INT *ldc ) { double *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_TRANSPOSE transa, transb; get_transpose_type(transpa, &transa); @@ -73,12 +73,12 @@ void F77_dgemm(int *layout, char *transpa, char *transpb, int *m, int *n, cblas_dgemm( UNDEFINED, transa, transb, *m, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_dsymm(int *layout, char *rtlf, char *uplow, int *m, int *n, - double *alpha, double *a, int *lda, double *b, int *ldb, - double *beta, double *c, int *ldc ) { +void F77_dsymm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + double *alpha, double *a, CBLAS_INT *lda, double *b, CBLAS_INT *ldb, + double *beta, double *c, CBLAS_INT *ldc ) { double *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -127,11 +127,11 @@ void F77_dsymm(int *layout, char *rtlf, char *uplow, int *m, int *n, *beta, c, *ldc ); } -void F77_dsyrk(int *layout, char *uplow, char *transp, int *n, int *k, - double *alpha, double *a, int *lda, - double *beta, double *c, int *ldc ) { +void F77_dsyrk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + double *alpha, double *a, CBLAS_INT *lda, + double *beta, double *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; double *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -175,10 +175,10 @@ void F77_dsyrk(int *layout, char *uplow, char *transp, int *n, int *k, c, *ldc ); } -void F77_dsyr2k(int *layout, char *uplow, char *transp, int *n, int *k, - double *alpha, double *a, int *lda, double *b, int *ldb, - double *beta, double *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_dsyr2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + double *alpha, double *a, CBLAS_INT *lda, double *b, CBLAS_INT *ldb, + double *beta, double *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; double *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -230,10 +230,10 @@ void F77_dsyr2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_dsyr2k(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_dtrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, double *alpha, double *a, int *lda, double *b, - int *ldb) { - int i,j,LDA,LDB; +void F77_dtrmm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, double *alpha, double *a, CBLAS_INT *lda, double *b, + CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; double *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; @@ -281,10 +281,10 @@ void F77_dtrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, a, *lda, b, *ldb); } -void F77_dtrsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, double *alpha, double *a, int *lda, double *b, - int *ldb) { - int i,j,LDA,LDB; +void F77_dtrsm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, double *alpha, double *a, CBLAS_INT *lda, double *b, + CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; double *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; diff --git a/CBLAS/testing/c_s2chke.c b/CBLAS/testing/c_s2chke.c index a781bd505f..adb09a6049 100644 --- a/CBLAS/testing/c_s2chke.c +++ b/CBLAS/testing/c_s2chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -30,7 +30,7 @@ void F77_s2chke(char *rout) { X[2] = {0.0,0.0}, Y[2] = {0.0,0.0}, ALPHA=0.0, BETA=0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_s3chke.c b/CBLAS/testing/c_s3chke.c index 425d6a7023..f95277e9c8 100644 --- a/CBLAS/testing/c_s3chke.c +++ b/CBLAS/testing/c_s3chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -30,7 +30,7 @@ void F77_s3chke(char *rout) { B[2] = {0.0,0.0}, C[2] = {0.0,0.0}, ALPHA=0.0, BETA=0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_sblas1.c b/CBLAS/testing/c_sblas1.c index 2e63d98148..e5a88766ff 100644 --- a/CBLAS/testing/c_sblas1.c +++ b/CBLAS/testing/c_sblas1.c @@ -8,42 +8,42 @@ */ #include "cblas_test.h" #include "cblas.h" -float F77_sasum(const int *N, float *X, const int *incX) +float F77_sasum(const CBLAS_INT *N, float *X, const CBLAS_INT *incX) { return cblas_sasum(*N, X, *incX); } -void F77_saxpy(const int *N, const float *alpha, const float *X, - const int *incX, float *Y, const int *incY) +void F77_saxpy(const CBLAS_INT *N, const float *alpha, const float *X, + const CBLAS_INT *incX, float *Y, const CBLAS_INT *incY) { cblas_saxpy(*N, *alpha, X, *incX, Y, *incY); return; } -float F77_scasum(const int *N, void *X, const int *incX) +float F77_scasum(const CBLAS_INT *N, void *X, const CBLAS_INT *incX) { return cblas_scasum(*N, X, *incX); } -float F77_scnrm2(const int *N, const void *X, const int *incX) +float F77_scnrm2(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { return cblas_scnrm2(*N, X, *incX); } -void F77_scopy(const int *N, const float *X, const int *incX, - float *Y, const int *incY) +void F77_scopy(const CBLAS_INT *N, const float *X, const CBLAS_INT *incX, + float *Y, const CBLAS_INT *incY) { cblas_scopy(*N, X, *incX, Y, *incY); return; } -float F77_sdot(const int *N, const float *X, const int *incX, - const float *Y, const int *incY) +float F77_sdot(const CBLAS_INT *N, const float *X, const CBLAS_INT *incX, + const float *Y, const CBLAS_INT *incY) { return cblas_sdot(*N, X, *incX, Y, *incY); } -float F77_snrm2(const int *N, const float *X, const int *incX) +float F77_snrm2(const CBLAS_INT *N, const float *X, const CBLAS_INT *incX) { return cblas_snrm2(*N, X, *incX); } @@ -54,28 +54,28 @@ void F77_srotg( float *a, float *b, float *c, float *s) return; } -void F77_srot( const int *N, float *X, const int *incX, float *Y, - const int *incY, const float *c, const float *s) +void F77_srot( const CBLAS_INT *N, float *X, const CBLAS_INT *incX, float *Y, + const CBLAS_INT *incY, const float *c, const float *s) { cblas_srot(*N,X,*incX,Y,*incY,*c,*s); return; } -void F77_sscal(const int *N, const float *alpha, float *X, - const int *incX) +void F77_sscal(const CBLAS_INT *N, const float *alpha, float *X, + const CBLAS_INT *incX) { cblas_sscal(*N, *alpha, X, *incX); return; } -void F77_sswap( const int *N, float *X, const int *incX, - float *Y, const int *incY) +void F77_sswap( const CBLAS_INT *N, float *X, const CBLAS_INT *incX, + float *Y, const CBLAS_INT *incY) { cblas_sswap(*N,X,*incX,Y,*incY); return; } -int F77_isamax(const int *N, const float *X, const int *incX) +CBLAS_INT F77_isamax(const CBLAS_INT *N, const float *X, const CBLAS_INT *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_isamax(*N, X, *incX)+1); diff --git a/CBLAS/testing/c_sblas2.c b/CBLAS/testing/c_sblas2.c index f119504872..00bb4ca13e 100644 --- a/CBLAS/testing/c_sblas2.c +++ b/CBLAS/testing/c_sblas2.c @@ -8,12 +8,12 @@ #include "cblas.h" #include "cblas_test.h" -void F77_sgemv(int *layout, char *transp, int *m, int *n, float *alpha, - float *a, int *lda, float *x, int *incx, float *beta, - float *y, int *incy ) { +void F77_sgemv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, float *alpha, + float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx, float *beta, + float *y, CBLAS_INT *incy ) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -35,11 +35,11 @@ void F77_sgemv(int *layout, char *transp, int *m, int *n, float *alpha, *m, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy ); } -void F77_sger(int *layout, int *m, int *n, float *alpha, float *x, int *incx, - float *y, int *incy, float *a, int *lda ) { +void F77_sger(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, float *alpha, float *x, CBLAS_INT *incx, + float *y, CBLAS_INT *incy, float *a, CBLAS_INT *lda ) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -60,10 +60,10 @@ void F77_sger(int *layout, int *m, int *n, float *alpha, float *x, int *incx, cblas_sger( CblasColMajor, *m, *n, *alpha, x, *incx, y, *incy, a, *lda ); } -void F77_strmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, float *a, int *lda, float *x, int *incx) { +void F77_strmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -88,10 +88,10 @@ void F77_strmv(int *layout, char *uplow, char *transp, char *diagn, } } -void F77_strsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, float *a, int *lda, float *x, int *incx ) { +void F77_strsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx ) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -112,11 +112,11 @@ void F77_strsv(int *layout, char *uplow, char *transp, char *diagn, else cblas_strsv(CblasColMajor, uplo, trans, diag, *n, a, *lda, x, *incx ); } -void F77_ssymv(int *layout, char *uplow, int *n, float *alpha, float *a, - int *lda, float *x, int *incx, float *beta, float *y, - int *incy) { +void F77_ssymv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *a, + CBLAS_INT *lda, float *x, CBLAS_INT *incx, float *beta, float *y, + CBLAS_INT *incy) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -136,10 +136,10 @@ void F77_ssymv(int *layout, char *uplow, int *n, float *alpha, float *a, *beta, y, *incy ); } -void F77_ssyr(int *layout, char *uplow, int *n, float *alpha, float *x, - int *incx, float *a, int *lda) { +void F77_ssyr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *x, + CBLAS_INT *incx, float *a, CBLAS_INT *lda) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -160,10 +160,10 @@ void F77_ssyr(int *layout, char *uplow, int *n, float *alpha, float *x, cblas_ssyr(CblasColMajor, uplo, *n, *alpha, x, *incx, a, *lda); } -void F77_ssyr2(int *layout, char *uplow, int *n, float *alpha, float *x, - int *incx, float *y, int *incy, float *a, int *lda) { +void F77_ssyr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *x, + CBLAS_INT *incx, float *y, CBLAS_INT *incy, float *a, CBLAS_INT *lda) { float *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -184,12 +184,12 @@ void F77_ssyr2(int *layout, char *uplow, int *n, float *alpha, float *x, cblas_ssyr2(CblasColMajor, uplo, *n, *alpha, x, *incx, y, *incy, a, *lda); } -void F77_sgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, - float *alpha, float *a, int *lda, float *x, int *incx, - float *beta, float *y, int *incy ) { +void F77_sgbmv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, CBLAS_INT *kl, CBLAS_INT *ku, + float *alpha, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx, + float *beta, float *y, CBLAS_INT *incy ) { float *A; - int i,irow,j,jcol,LDA; + CBLAS_INT i,irow,j,jcol,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -222,10 +222,10 @@ void F77_sgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, a, *lda, x, *incx, *beta, y, *incy ); } -void F77_stbmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, float *a, int *lda, float *x, int *incx) { +void F77_stbmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx) { float *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -268,10 +268,10 @@ void F77_stbmv(int *layout, char *uplow, char *transp, char *diagn, cblas_stbmv(CblasColMajor, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_stbsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, float *a, int *lda, float *x, int *incx) { +void F77_stbsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx) { float *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -314,11 +314,11 @@ void F77_stbsv(int *layout, char *uplow, char *transp, char *diagn, cblas_stbsv(CblasColMajor, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ssbmv(int *layout, char *uplow, int *n, int *k, float *alpha, - float *a, int *lda, float *x, int *incx, float *beta, - float *y, int *incy) { +void F77_ssbmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_INT *k, float *alpha, + float *a, CBLAS_INT *lda, float *x, CBLAS_INT *incx, float *beta, + float *y, CBLAS_INT *incy) { float *A; - int i,j,irow,jcol,LDA; + CBLAS_INT i,j,irow,jcol,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -359,10 +359,10 @@ void F77_ssbmv(int *layout, char *uplow, int *n, int *k, float *alpha, *beta, y, *incy ); } -void F77_sspmv(int *layout, char *uplow, int *n, float *alpha, float *ap, - float *x, int *incx, float *beta, float *y, int *incy) { +void F77_sspmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *ap, + float *x, CBLAS_INT *incx, float *beta, float *y, CBLAS_INT *incy) { float *A,*AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -396,10 +396,10 @@ void F77_sspmv(int *layout, char *uplow, int *n, float *alpha, float *ap, *incy ); } -void F77_stpmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, float *ap, float *x, int *incx) { +void F77_stpmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, float *ap, float *x, CBLAS_INT *incx) { float *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -435,10 +435,10 @@ void F77_stpmv(int *layout, char *uplow, char *transp, char *diagn, cblas_stpmv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_stpsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, float *ap, float *x, int *incx) { +void F77_stpsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, float *ap, float *x, CBLAS_INT *incx) { float *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -475,10 +475,10 @@ void F77_stpsv(int *layout, char *uplow, char *transp, char *diagn, cblas_stpsv( CblasColMajor, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_sspr(int *layout, char *uplow, int *n, float *alpha, float *x, - int *incx, float *ap ){ +void F77_sspr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *x, + CBLAS_INT *incx, float *ap ){ float *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -526,10 +526,10 @@ void F77_sspr(int *layout, char *uplow, int *n, float *alpha, float *x, cblas_sspr( CblasColMajor, uplo, *n, *alpha, x, *incx, ap ); } -void F77_sspr2(int *layout, char *uplow, int *n, float *alpha, float *x, - int *incx, float *y, int *incy, float *ap ){ +void F77_sspr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, float *alpha, float *x, + CBLAS_INT *incx, float *y, CBLAS_INT *incy, float *ap ){ float *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); diff --git a/CBLAS/testing/c_sblas3.c b/CBLAS/testing/c_sblas3.c index 024fc474fd..0621b293d5 100644 --- a/CBLAS/testing/c_sblas3.c +++ b/CBLAS/testing/c_sblas3.c @@ -9,12 +9,12 @@ #include "cblas.h" #include "cblas_test.h" -void F77_sgemm(int *layout, char *transpa, char *transpb, int *m, int *n, - int *k, float *alpha, float *a, int *lda, float *b, int *ldb, - float *beta, float *c, int *ldc ) { +void F77_sgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_INT *k, float *alpha, float *a, CBLAS_INT *lda, float *b, CBLAS_INT *ldb, + float *beta, float *c, CBLAS_INT *ldc ) { float *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_TRANSPOSE transa, transb; get_transpose_type(transpa, &transa); @@ -70,12 +70,12 @@ void F77_sgemm(int *layout, char *transpa, char *transpb, int *m, int *n, cblas_sgemm( UNDEFINED, transa, transb, *m, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_ssymm(int *layout, char *rtlf, char *uplow, int *m, int *n, - float *alpha, float *a, int *lda, float *b, int *ldb, - float *beta, float *c, int *ldc ) { +void F77_ssymm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + float *alpha, float *a, CBLAS_INT *lda, float *b, CBLAS_INT *ldb, + float *beta, float *c, CBLAS_INT *ldc ) { float *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -124,11 +124,11 @@ void F77_ssymm(int *layout, char *rtlf, char *uplow, int *m, int *n, *beta, c, *ldc ); } -void F77_ssyrk(int *layout, char *uplow, char *transp, int *n, int *k, - float *alpha, float *a, int *lda, - float *beta, float *c, int *ldc ) { +void F77_ssyrk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + float *alpha, float *a, CBLAS_INT *lda, + float *beta, float *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; float *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -172,10 +172,10 @@ void F77_ssyrk(int *layout, char *uplow, char *transp, int *n, int *k, c, *ldc ); } -void F77_ssyr2k(int *layout, char *uplow, char *transp, int *n, int *k, - float *alpha, float *a, int *lda, float *b, int *ldb, - float *beta, float *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_ssyr2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + float *alpha, float *a, CBLAS_INT *lda, float *b, CBLAS_INT *ldb, + float *beta, float *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; float *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -227,10 +227,10 @@ void F77_ssyr2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_ssyr2k(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_strmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, float *alpha, float *a, int *lda, float *b, - int *ldb) { - int i,j,LDA,LDB; +void F77_strmm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, float *alpha, float *a, CBLAS_INT *lda, float *b, + CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; float *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; @@ -278,10 +278,10 @@ void F77_strmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, a, *lda, b, *ldb); } -void F77_strsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, float *alpha, float *a, int *lda, float *b, - int *ldb) { - int i,j,LDA,LDB; +void F77_strsm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, float *alpha, float *a, CBLAS_INT *lda, float *b, + CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; float *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; diff --git a/CBLAS/testing/c_xerbla.c b/CBLAS/testing/c_xerbla.c index c3f14c364a..57d61ee1fd 100644 --- a/CBLAS/testing/c_xerbla.c +++ b/CBLAS/testing/c_xerbla.c @@ -7,8 +7,8 @@ void cblas_xerbla(CBLAS_INT info, const char *rout, const char *form, ...) { - extern int cblas_lerr, cblas_info, cblas_ok; - extern int link_xerbla; + extern CBLAS_INT cblas_lerr, cblas_info, cblas_ok; + extern CBLAS_INT link_xerbla; extern int RowMajorStrg; extern char *cblas_rout; @@ -101,9 +101,9 @@ void F77_xerbla(char *srname, void *vinfo) F77_Integer i; extern F77_Integer link_xerbla; #else - int *info=vinfo; - int i; - extern int link_xerbla; + CBLAS_INT *info=vinfo; + CBLAS_INT i; + extern CBLAS_INT link_xerbla; #endif #ifdef F77_Char srname = F2C_STR(F77_srname, XerblaStrLen); diff --git a/CBLAS/testing/c_z2chke.c b/CBLAS/testing/c_z2chke.c index 65e552da3e..23f6896761 100644 --- a/CBLAS/testing/c_z2chke.c +++ b/CBLAS/testing/c_z2chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -32,7 +32,7 @@ void F77_z2chke(char *rout) { ALPHA[2] = {0.0,0.0}, BETA[2] = {0.0,0.0}, RALPHA = 0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_z3chke.c b/CBLAS/testing/c_z3chke.c index 30840489af..d114e8f995 100644 --- a/CBLAS/testing/c_z3chke.c +++ b/CBLAS/testing/c_z3chke.c @@ -3,8 +3,8 @@ #include "cblas.h" #include "cblas_test.h" -int cblas_ok, cblas_lerr, cblas_info; -int link_xerbla=TRUE; +CBLAS_INT cblas_ok, cblas_lerr, cblas_info; +CBLAS_INT link_xerbla=TRUE; char *cblas_rout; #ifdef F77_Char @@ -14,8 +14,8 @@ void F77_xerbla(char *srname, void *vinfo); #endif void chkxer(void) { - extern int cblas_ok, cblas_lerr, cblas_info; - extern int link_xerbla; + extern CBLAS_INT cblas_ok, cblas_lerr, cblas_info; + extern CBLAS_INT link_xerbla; extern char *cblas_rout; if (cblas_lerr == 1 ) { printf("***** ILLEGAL VALUE OF PARAMETER NUMBER %d NOT DETECTED BY %s *****\n", cblas_info, cblas_rout); @@ -32,7 +32,7 @@ void F77_z3chke(char * rout) { ALPHA[2] = {0.0,0.0}, BETA[2] = {0.0,0.0}, RALPHA = 0.0, RBETA = 0.0; - extern int cblas_info, cblas_lerr, cblas_ok; + extern CBLAS_INT cblas_info, cblas_lerr, cblas_ok; extern int RowMajorStrg; extern char *cblas_rout; diff --git a/CBLAS/testing/c_zblas1.c b/CBLAS/testing/c_zblas1.c index 2b21d8f187..698397db4b 100644 --- a/CBLAS/testing/c_zblas1.c +++ b/CBLAS/testing/c_zblas1.c @@ -8,67 +8,67 @@ */ #include "cblas_test.h" #include "cblas.h" -void F77_zaxpy(const int *N, const void *alpha, void *X, - const int *incX, void *Y, const int *incY) +void F77_zaxpy(const CBLAS_INT *N, const void *alpha, void *X, + const CBLAS_INT *incX, void *Y, const CBLAS_INT *incY) { cblas_zaxpy(*N, alpha, X, *incX, Y, *incY); return; } -void F77_zcopy(const int *N, void *X, const int *incX, - void *Y, const int *incY) +void F77_zcopy(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY) { cblas_zcopy(*N, X, *incX, Y, *incY); return; } -void F77_zdotc(const int *N, const void *X, const int *incX, - const void *Y, const int *incY,void *dotc) +void F77_zdotc(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX, + const void *Y, const CBLAS_INT *incY,void *dotc) { cblas_zdotc_sub(*N, X, *incX, Y, *incY, dotc); return; } -void F77_zdotu(const int *N, void *X, const int *incX, - void *Y, const int *incY,void *dotu) +void F77_zdotu(const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY,void *dotu) { cblas_zdotu_sub(*N, X, *incX, Y, *incY, dotu); return; } -void F77_zdscal(const int *N, const double *alpha, void *X, - const int *incX) +void F77_zdscal(const CBLAS_INT *N, const double *alpha, void *X, + const CBLAS_INT *incX) { cblas_zdscal(*N, *alpha, X, *incX); return; } -void F77_zscal(const int *N, const void * *alpha, void *X, - const int *incX) +void F77_zscal(const CBLAS_INT *N, const void * *alpha, void *X, + const CBLAS_INT *incX) { cblas_zscal(*N, alpha, X, *incX); return; } -void F77_zswap( const int *N, void *X, const int *incX, - void *Y, const int *incY) +void F77_zswap( const CBLAS_INT *N, void *X, const CBLAS_INT *incX, + void *Y, const CBLAS_INT *incY) { cblas_zswap(*N,X,*incX,Y,*incY); return; } -int F77_izamax(const int *N, const void *X, const int *incX) +CBLAS_INT F77_izamax(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { if (*N < 1 || *incX < 1) return(0); return(cblas_izamax(*N, X, *incX)+1); } -double F77_dznrm2(const int *N, const void *X, const int *incX) +double F77_dznrm2(const CBLAS_INT *N, const void *X, const CBLAS_INT *incX) { return cblas_dznrm2(*N, X, *incX); } -double F77_dzasum(const int *N, void *X, const int *incX) +double F77_dzasum(const CBLAS_INT *N, void *X, const CBLAS_INT *incX) { return cblas_dzasum(*N, X, *incX); } diff --git a/CBLAS/testing/c_zblas2.c b/CBLAS/testing/c_zblas2.c index b6fbdd628d..b70f9ce5e0 100644 --- a/CBLAS/testing/c_zblas2.c +++ b/CBLAS/testing/c_zblas2.c @@ -8,13 +8,13 @@ #include "cblas.h" #include "cblas_test.h" -void F77_zgemv(int *layout, char *transp, int *m, int *n, +void F77_zgemv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, const void *alpha, - CBLAS_TEST_ZOMPLEX *a, int *lda, const void *x, int *incx, - const void *beta, void *y, int *incy) { + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, const void *x, CBLAS_INT *incx, + const void *beta, void *y, CBLAS_INT *incy) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -38,13 +38,13 @@ void F77_zgemv(int *layout, char *transp, int *m, int *n, *m, *n, alpha, a, *lda, x, *incx, beta, y, *incy ); } -void F77_zgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *x, int *incx, - CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, int *incy) { +void F77_zgbmv(CBLAS_INT *layout, char *transp, CBLAS_INT *m, CBLAS_INT *n, CBLAS_INT *kl, CBLAS_INT *ku, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, + CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy) { CBLAS_TEST_ZOMPLEX *A; - int i,j,irow,jcol,LDA; + CBLAS_INT i,j,irow,jcol,LDA; CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); @@ -85,12 +85,12 @@ void F77_zgbmv(int *layout, char *transp, int *m, int *n, int *kl, int *ku, *incx, beta, y, *incy ); } -void F77_zgeru(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, - CBLAS_TEST_ZOMPLEX *a, int *lda){ +void F77_zgeru(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda){ CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -114,11 +114,11 @@ void F77_zgeru(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, cblas_zgeru( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); } -void F77_zgerc(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, - CBLAS_TEST_ZOMPLEX *a, int *lda) { +void F77_zgerc(CBLAS_INT *layout, CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; @@ -142,12 +142,12 @@ void F77_zgerc(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, cblas_zgerc( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); } -void F77_zhemv(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx, CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, int *incy){ +void F77_zhemv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -172,13 +172,13 @@ void F77_zhemv(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, beta, y, *incy ); } -void F77_zhbmv(int *layout, char *uplow, int *n, int *k, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *y, int *incy){ +void F77_zhbmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_ZOMPLEX *A; -int i,irow,j,jcol,LDA; +CBLAS_INT i,irow,j,jcol,LDA; CBLAS_UPLO uplo; @@ -236,12 +236,12 @@ int i,irow,j,jcol,LDA; beta, y, *incy ); } -void F77_zhpmv(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, int *incx, - CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, int *incy){ +void F77_zhpmv(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, + CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy){ CBLAS_TEST_ZOMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -292,11 +292,11 @@ void F77_zhpmv(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, *incy ); } -void F77_ztbmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx) { +void F77_ztbmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -355,12 +355,12 @@ void F77_ztbmv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztbmv(UNDEFINED, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ztbsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, int *k, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx) { +void F77_ztbsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_INT *k, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A; - int irow, jcol, i, j, LDA; + CBLAS_INT irow, jcol, i, j, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -419,10 +419,10 @@ void F77_ztbsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztbsv(UNDEFINED, uplo, trans, diag, *n, *k, a, *lda, x, *incx); } -void F77_ztpmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, int *incx) { +void F77_ztpmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -474,10 +474,10 @@ void F77_ztpmv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztpmv( UNDEFINED, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_ztpsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, int *incx) { +void F77_ztpsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *ap, CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A, *AP; - int i, j, k, LDA; + CBLAS_INT i, j, k, LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -529,11 +529,11 @@ void F77_ztpsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztpsv( UNDEFINED, uplo, trans, diag, *n, ap, x, *incx ); } -void F77_ztrmv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx) { +void F77_ztrmv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -558,11 +558,11 @@ void F77_ztrmv(int *layout, char *uplow, char *transp, char *diagn, else cblas_ztrmv(UNDEFINED, uplo, trans, diag, *n, a, *lda, x, *incx); } -void F77_ztrsv(int *layout, char *uplow, char *transp, char *diagn, - int *n, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *x, - int *incx) { +void F77_ztrsv(CBLAS_INT *layout, char *uplow, char *transp, char *diagn, + CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *x, + CBLAS_INT *incx) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_TRANSPOSE trans; CBLAS_UPLO uplo; CBLAS_DIAG diag; @@ -588,10 +588,10 @@ void F77_ztrsv(int *layout, char *uplow, char *transp, char *diagn, cblas_ztrsv(UNDEFINED, uplo, trans, diag, *n, a, *lda, x, *incx ); } -void F77_zhpr(int *layout, char *uplow, int *n, double *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *ap) { +void F77_zhpr(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *ap) { CBLAS_TEST_ZOMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -663,11 +663,11 @@ void F77_zhpr(int *layout, char *uplow, int *n, double *alpha, cblas_zhpr(UNDEFINED, uplo, *n, *alpha, x, *incx, ap ); } -void F77_zhpr2(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, +void F77_zhpr2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy, CBLAS_TEST_ZOMPLEX *ap) { CBLAS_TEST_ZOMPLEX *A, *AP; - int i,j,k,LDA; + CBLAS_INT i,j,k,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -740,10 +740,10 @@ void F77_zhpr2(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, cblas_zhpr2( UNDEFINED, uplo, *n, alpha, x, *incx, y, *incy, ap ); } -void F77_zher(int *layout, char *uplow, int *n, double *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *a, int *lda) { +void F77_zher(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, double *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); @@ -772,12 +772,12 @@ void F77_zher(int *layout, char *uplow, int *n, double *alpha, cblas_zher( UNDEFINED, uplo, *n, *alpha, x, *incx, a, *lda ); } -void F77_zher2(int *layout, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha, - CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, - CBLAS_TEST_ZOMPLEX *a, int *lda) { +void F77_zher2(CBLAS_INT *layout, char *uplow, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, + CBLAS_TEST_ZOMPLEX *x, CBLAS_INT *incx, CBLAS_TEST_ZOMPLEX *y, CBLAS_INT *incy, + CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda) { CBLAS_TEST_ZOMPLEX *A; - int i,j,LDA; + CBLAS_INT i,j,LDA; CBLAS_UPLO uplo; get_uplo_type(uplow,&uplo); diff --git a/CBLAS/testing/c_zblas3.c b/CBLAS/testing/c_zblas3.c index 65a821359c..891c70a83d 100644 --- a/CBLAS/testing/c_zblas3.c +++ b/CBLAS/testing/c_zblas3.c @@ -11,13 +11,13 @@ #define TEST_ROW_MJR 1 #define UNDEFINED -1 -void F77_zgemm(int *layout, char *transpa, char *transpb, int *m, int *n, - int *k, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zgemm(CBLAS_INT *layout, char *transpa, char *transpb, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_INT *k, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_ZOMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_TRANSPOSE transa, transb; get_transpose_type(transpa, &transa); @@ -87,13 +87,13 @@ void F77_zgemm(int *layout, char *transpa, char *transpb, int *m, int *n, cblas_zgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_zhemm(int *layout, char *rtlf, char *uplow, int *m, int *n, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zhemm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_ZOMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -151,13 +151,13 @@ void F77_zhemm(int *layout, char *rtlf, char *uplow, int *m, int *n, cblas_zhemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_zsymm(int *layout, char *rtlf, char *uplow, int *m, int *n, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zsymm(CBLAS_INT *layout, char *rtlf, char *uplow, CBLAS_INT *m, CBLAS_INT *n, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { CBLAS_TEST_ZOMPLEX *A, *B, *C; - int i,j,LDA, LDB, LDC; + CBLAS_INT i,j,LDA, LDB, LDC; CBLAS_UPLO uplo; CBLAS_SIDE side; @@ -206,11 +206,11 @@ void F77_zsymm(int *layout, char *rtlf, char *uplow, int *m, int *n, beta, c, *ldc ); } -void F77_zherk(int *layout, char *uplow, char *transp, int *n, int *k, - double *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - double *beta, CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zherk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + double *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + double *beta, CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; CBLAS_TEST_ZOMPLEX *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -262,11 +262,11 @@ void F77_zherk(int *layout, char *uplow, char *transp, int *n, int *k, c, *ldc ); } -void F77_zsyrk(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *c, int *ldc ) { +void F77_zsyrk(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *beta, CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { - int i,j,LDA,LDC; + CBLAS_INT i,j,LDA,LDC; CBLAS_TEST_ZOMPLEX *A, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -317,11 +317,11 @@ void F77_zsyrk(int *layout, char *uplow, char *transp, int *n, int *k, cblas_zsyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta, c, *ldc ); } -void F77_zher2k(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, double *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_zher2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, double *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; CBLAS_TEST_ZOMPLEX *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -381,11 +381,11 @@ void F77_zher2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_zher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, b, *ldb, *beta, c, *ldc ); } -void F77_zsyr2k(int *layout, char *uplow, char *transp, int *n, int *k, - CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, - CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta, - CBLAS_TEST_ZOMPLEX *c, int *ldc ) { - int i,j,LDA,LDB,LDC; +void F77_zsyr2k(CBLAS_INT *layout, char *uplow, char *transp, CBLAS_INT *n, CBLAS_INT *k, + CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, CBLAS_INT *lda, + CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb, CBLAS_TEST_ZOMPLEX *beta, + CBLAS_TEST_ZOMPLEX *c, CBLAS_INT *ldc ) { + CBLAS_INT i,j,LDA,LDB,LDC; CBLAS_TEST_ZOMPLEX *A, *B, *C; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; @@ -445,10 +445,10 @@ void F77_zsyr2k(int *layout, char *uplow, char *transp, int *n, int *k, cblas_zsyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, b, *ldb, beta, c, *ldc ); } -void F77_ztrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, - int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { - int i,j,LDA,LDB; +void F77_ztrmm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, + CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; CBLAS_TEST_ZOMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; @@ -504,10 +504,10 @@ void F77_ztrmm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, a, *lda, b, *ldb); } -void F77_ztrsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, - int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, - int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { - int i,j,LDA,LDB; +void F77_ztrsm(CBLAS_INT *layout, char *rtlf, char *uplow, char *transp, char *diagn, + CBLAS_INT *m, CBLAS_INT *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, + CBLAS_INT *lda, CBLAS_TEST_ZOMPLEX *b, CBLAS_INT *ldb) { + CBLAS_INT i,j,LDA,LDB; CBLAS_TEST_ZOMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; diff --git a/CMakeLists.txt b/CMakeLists.txt index 41b3c138cf..c0bc6f6892 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 3.2) project(LAPACK Fortran C) set(LAPACK_MAJOR_VERSION 3) -set(LAPACK_MINOR_VERSION 10) -set(LAPACK_PATCH_VERSION 1) +set(LAPACK_MINOR_VERSION 11) +set(LAPACK_PATCH_VERSION 0) set( LAPACK_VERSION ${LAPACK_MAJOR_VERSION}.${LAPACK_MINOR_VERSION}.${LAPACK_PATCH_VERSION} @@ -42,7 +42,7 @@ if(_is_coverage_build) endif() # By default test Fortran compiler complex abs and complex division -option(TEST_FORTRAN_COMPILER "Test Fortran compiler complex abs and complex division" ON) +option(TEST_FORTRAN_COMPILER "Test Fortran compiler complex abs and complex division" OFF) if( TEST_FORTRAN_COMPILER ) add_executable( test_zcomplexabs ${LAPACK_SOURCE_DIR}/INSTALL/test_zcomplexabs.f ) @@ -207,6 +207,22 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL Compaq) endif() endif() +# Add option to enable flat namespace for symbol resolution on macOS +if(APPLE) + option(USE_FLAT_NAMESPACE "Use flat namespaces for symbol resolution during build and runtime." OFF) + + if(USE_FLAT_NAMESPACE) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-flat_namespace") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-flat_namespace") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-flat_namespace") + else() + if(BUILD_SHARED_LIBS AND BUILD_TESTING) + message(WARNING + "LAPACK test suite might fail with shared libraries and the default two-level namespace. " + "Disable shared libraries or enable flat namespace for symbol resolution via -DUSE_FLAT_NAMESPACE=ON.") + endif() + endif() +endif() # -------------------------------------------------- set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKLIB}-targets) diff --git a/DOCS/Doxyfile b/DOCS/Doxyfile index 4229769c98..97415241c4 100644 --- a/DOCS/Doxyfile +++ b/DOCS/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 3.10.1 +PROJECT_NUMBER = 3.11.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/DOCS/Doxyfile_man b/DOCS/Doxyfile_man index 6a64156fba..6625de4566 100644 --- a/DOCS/Doxyfile_man +++ b/DOCS/Doxyfile_man @@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 3.10.1 +PROJECT_NUMBER = 3.11.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/INSTALL/ilaver.f b/INSTALL/ilaver.f index fd179ceb81..7d55e13e9d 100644 --- a/INSTALL/ilaver.f +++ b/INSTALL/ilaver.f @@ -58,8 +58,8 @@ SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH * ===================================================================== VERS_MAJOR = 3 - VERS_MINOR = 10 - VERS_PATCH = 1 + VERS_MINOR = 11 + VERS_PATCH = 0 * ===================================================================== * RETURN diff --git a/LAPACKE/include/lapacke.h b/LAPACKE/include/lapacke.h index 29d5943a32..9998b15047 100644 --- a/LAPACKE/include/lapacke.h +++ b/LAPACKE/include/lapacke.h @@ -10233,6 +10233,13 @@ lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb, double* c, lapack_int ldc, double* scale, lapack_int* iwork, lapack_int liwork, double* swork, lapack_int ldswork ); +lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc, + float* scale, float* swork, + lapack_int ldswork ); lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, diff --git a/LAPACKE/include/lapacke_utils.h b/LAPACKE/include/lapacke_utils.h index 95979928d2..332a5024fb 100644 --- a/LAPACKE/include/lapacke_utils.h +++ b/LAPACKE/include/lapacke_utils.h @@ -128,6 +128,10 @@ void LAPACKE_ctp_trans( int matrix_layout, char uplo, char diag, void LAPACKE_ctr_trans( int matrix_layout, char uplo, char diag, lapack_int n, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); +void LAPACKE_ctz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float *in, lapack_int ldin, + lapack_complex_float *out, lapack_int ldout ); void LAPACKE_dgb_trans( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, @@ -178,6 +182,10 @@ void LAPACKE_dtp_trans( int matrix_layout, char uplo, char diag, void LAPACKE_dtr_trans( int matrix_layout, char uplo, char diag, lapack_int n, const double *in, lapack_int ldin, double *out, lapack_int ldout ); +void LAPACKE_dtz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const double *in, lapack_int ldin, + double *out, lapack_int ldout ); void LAPACKE_sgb_trans( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, @@ -228,6 +236,10 @@ void LAPACKE_stp_trans( int matrix_layout, char uplo, char diag, void LAPACKE_str_trans( int matrix_layout, char uplo, char diag, lapack_int n, const float *in, lapack_int ldin, float *out, lapack_int ldout ); +void LAPACKE_stz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const float *in, lapack_int ldin, + float *out, lapack_int ldout ); void LAPACKE_zgb_trans( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, @@ -284,6 +296,10 @@ void LAPACKE_ztp_trans( int matrix_layout, char uplo, char diag, void LAPACKE_ztr_trans( int matrix_layout, char uplo, char diag, lapack_int n, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); +void LAPACKE_ztz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double *in, lapack_int ldin, + lapack_complex_double *out, lapack_int ldout ); /* NaN checkers */ #define LAPACK_SISNAN( x ) ( x != x ) @@ -376,6 +392,10 @@ lapack_logical LAPACKE_ctr_nancheck( int matrix_layout, char uplo, char diag, lapack_int n, const lapack_complex_float *a, lapack_int lda ); +lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float *a, + lapack_int lda ); lapack_logical LAPACKE_dgb_nancheck( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, @@ -440,6 +460,9 @@ lapack_logical LAPACKE_dtr_nancheck( int matrix_layout, char uplo, char diag, lapack_int n, const double *a, lapack_int lda ); +lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const double *a, lapack_int lda ); lapack_logical LAPACKE_sgb_nancheck( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, @@ -504,6 +527,9 @@ lapack_logical LAPACKE_str_nancheck( int matrix_layout, char uplo, char diag, lapack_int n, const float *a, lapack_int lda ); +lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const float *a, lapack_int lda ); lapack_logical LAPACKE_zgb_nancheck( int matrix_layout, lapack_int m, lapack_int n, lapack_int kl, @@ -574,6 +600,10 @@ lapack_logical LAPACKE_ztr_nancheck( int matrix_layout, char uplo, char diag, lapack_int n, const lapack_complex_double *a, lapack_int lda ); +lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double *a, + lapack_int lda ); #ifdef __cplusplus } diff --git a/LAPACKE/src/Makefile b/LAPACKE/src/Makefile index fdd62eab21..65e0e557f7 100644 --- a/LAPACKE/src/Makefile +++ b/LAPACKE/src/Makefile @@ -356,6 +356,8 @@ lapacke_clacrm.o \ lapacke_clacrm_work.o \ lapacke_clag2z.o \ lapacke_clag2z_work.o \ +lapacke_clangb.o \ +lapacke_clangb_work.o \ lapacke_clange.o \ lapacke_clange_work.o \ lapacke_clanhe.o \ @@ -840,6 +842,8 @@ lapacke_dlag2s.o \ lapacke_dlag2s_work.o \ lapacke_dlamch.o \ lapacke_dlamch_work.o \ +lapacke_dlangb.o \ +lapacke_dlangb_work.o \ lapacke_dlange.o \ lapacke_dlange_work.o \ lapacke_dlansy.o \ @@ -1412,6 +1416,8 @@ lapacke_slag2d.o \ lapacke_slag2d_work.o \ lapacke_slamch.o \ lapacke_slamch_work.o \ +lapacke_slangb.o \ +lapacke_slangb_work.o \ lapacke_slange.o \ lapacke_slange_work.o \ lapacke_slansy.o \ @@ -2114,6 +2120,8 @@ lapacke_zlacrm.o \ lapacke_zlacrm_work.o \ lapacke_zlag2c.o \ lapacke_zlag2c_work.o \ +lapacke_zlangb.o \ +lapacke_zlangb_work.o \ lapacke_zlange.o \ lapacke_zlange_work.o \ lapacke_zlanhe.o \ diff --git a/LAPACKE/src/lapacke_clantr.c b/LAPACKE/src/lapacke_clantr.c index 88e765f2b8..e00b6c5788 100644 --- a/LAPACKE/src/lapacke_clantr.c +++ b/LAPACKE/src/lapacke_clantr.c @@ -33,8 +33,8 @@ #include "lapacke_utils.h" float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag, - lapack_int m, lapack_int n, const lapack_complex_float* a, - lapack_int lda ) + lapack_int m, lapack_int n, const lapack_complex_float* a, + lapack_int lda ) { lapack_int info = 0; float res = 0.; @@ -46,7 +46,7 @@ float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag, #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - if( LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + if( LAPACKE_ctz_nancheck( matrix_layout, 'f', uplo, diag, m, n, a, lda ) ) { return -7; } } diff --git a/LAPACKE/src/lapacke_clarfb.c b/LAPACKE/src/lapacke_clarfb.c index ccd34cecdf..ed12b476eb 100644 --- a/LAPACKE/src/lapacke_clarfb.c +++ b/LAPACKE/src/lapacke_clarfb.c @@ -42,7 +42,9 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct lapack_int info = 0; lapack_int ldwork; lapack_complex_float* work = NULL; - lapack_int ncols_v, nrows_v; + lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_clarfb", -1 ); return -1; @@ -50,59 +52,27 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - lapack_int lrv, lcv; /* row, column stride */ - if( matrix_layout == LAPACK_COL_MAJOR ) { - lrv = 1; - lcv = ldv; - } else { - lrv = ldv; - lcv = 1; - } - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { + LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); + return -8; + } + if( LAPACKE_ctz_nancheck( matrix_layout, direct, uplo, 'u', + nrows_v, ncols_v, v, ldv ) ) { + return -9; } if( LAPACKE_cge_nancheck( matrix_layout, k, k, t, ldt ) ) { return -11; } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ctr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v-k, ncols_v, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); - return -8; - } - if( LAPACKE_ctr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*lrv], ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ctr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v-k, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); - return -8; - } - if( LAPACKE_ctr_nancheck( matrix_layout, 'l', 'u', k, - &v[(ncols_v-k)*lcv], ldv ) ) - return -9; - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; + if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } } #endif diff --git a/LAPACKE/src/lapacke_clarfb_work.c b/LAPACKE/src/lapacke_clarfb_work.c index 3ad97c22d0..545769b83c 100644 --- a/LAPACKE/src/lapacke_clarfb_work.c +++ b/LAPACKE/src/lapacke_clarfb_work.c @@ -42,6 +42,8 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, { lapack_int info = 0; lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; lapack_int ldc_t, ldt_t, ldv_t; lapack_complex_float *v_t = NULL, *t_t = NULL, *c_t = NULL; if( matrix_layout == LAPACK_COL_MAJOR ) { @@ -52,16 +54,14 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); + + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + ldc_t = MAX(1,m); ldt_t = MAX(1,k); ldv_t = MAX(1,nrows_v); @@ -81,6 +81,11 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); return info; } + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_clarfb_work", info ); + return info; + } /* Allocate memory for temporary array(s) */ v_t = (lapack_complex_float*) LAPACKE_malloc( sizeof(lapack_complex_float) * @@ -102,36 +107,8 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, goto exit_level_2; } /* Transpose input matrices */ - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_ctr_trans( matrix_layout, 'l', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_cge_trans( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], ldv, - &v_t[k], ldv_t ); - } else if( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb_work", -8 ); - return -8; - } - LAPACKE_ctr_trans( matrix_layout, 'u', 'u', k, &v[(nrows_v-k)*ldv], - ldv, &v_t[nrows_v-k], ldv_t ); - LAPACKE_cge_trans( matrix_layout, nrows_v-k, ncols_v, v, ldv, v_t, - ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_ctr_trans( matrix_layout, 'u', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_cge_trans( matrix_layout, nrows_v, ncols_v-k, &v[k], ldv, - &v_t[k*ldv_t], ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_clarfb_work", -8 ); - return -8; - } - LAPACKE_ctr_trans( matrix_layout, 'l', 'u', k, &v[ncols_v-k], ldv, - &v_t[(ncols_v-k)*ldv_t], ldv_t ); - LAPACKE_cge_trans( matrix_layout, nrows_v, ncols_v-k, v, ldv, v_t, - ldv_t ); - } + LAPACKE_ctz_trans( matrix_layout, direct, uplo, 'u', nrows_v, ncols_v, + v, ldv, v_t, ldv_t ); LAPACKE_cge_trans( matrix_layout, k, k, t, ldt, t_t, ldt_t ); LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ diff --git a/LAPACKE/src/lapacke_ctgsen.c b/LAPACKE/src/lapacke_ctgsen.c index c543cd44f9..b69c08bb44 100644 --- a/LAPACKE/src/lapacke_ctgsen.c +++ b/LAPACKE/src/lapacke_ctgsen.c @@ -86,12 +86,10 @@ lapack_int LAPACKE_ctgsen( int matrix_layout, lapack_int ijob, liwork = iwork_query; lwork = LAPACK_C2INT( work_query ); /* Allocate memory for work arrays */ - if( ijob != 0 ) { - iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if( iwork == NULL ) { - info = LAPACK_WORK_MEMORY_ERROR; - goto exit_level_0; - } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; } work = (lapack_complex_float*) LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); @@ -106,9 +104,7 @@ lapack_int LAPACKE_ctgsen( int matrix_layout, lapack_int ijob, /* Release memory and exit */ LAPACKE_free( work ); exit_level_1: - if( ijob != 0 ) { - LAPACKE_free( iwork ); - } + LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_ctgsen", info ); diff --git a/LAPACKE/src/lapacke_ctpmqrt_work.c b/LAPACKE/src/lapacke_ctpmqrt_work.c index 5ec948e7bb..e01664bdf8 100644 --- a/LAPACKE/src/lapacke_ctpmqrt_work.c +++ b/LAPACKE/src/lapacke_ctpmqrt_work.c @@ -50,16 +50,24 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,k); + lapack_int nrowsA, ncolsA, nrowsV; + if ( side == LAPACKE_lsame(side, 'l') ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == LAPACKE_lsame(side, 'r') ) { nrowsA = m; ncolsA = k; nrowsV = n; } + else { + info = -2; + LAPACKE_xerbla( "LAPACKE_ctpmqrt_work", info ); + return info; + } + lapack_int lda_t = MAX(1,nrowsA); lapack_int ldb_t = MAX(1,m); - lapack_int ldt_t = MAX(1,ldt); - lapack_int ldv_t = MAX(1,ldv); + lapack_int ldt_t = MAX(1,nb); + lapack_int ldv_t = MAX(1,nrowsV); lapack_complex_float* v_t = NULL; lapack_complex_float* t_t = NULL; lapack_complex_float* a_t = NULL; lapack_complex_float* b_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_ctpmqrt_work", info ); return info; @@ -69,7 +77,7 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_ctpmqrt_work", info ); return info; } - if( ldt < nb ) { + if( ldt < k ) { info = -12; LAPACKE_xerbla( "LAPACKE_ctpmqrt_work", info ); return info; @@ -87,13 +95,13 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_0; } t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,nb) ); + LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,k) ); if( t_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } a_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,m) ); + LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,ncolsA) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; @@ -105,10 +113,10 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_3; } /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_layout, ldv, k, v, ldv, v_t, ldv_t ); - LAPACKE_cge_trans( matrix_layout, ldt, nb, t, ldt, t_t, ldt_t ); - LAPACKE_cge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); - LAPACKE_cge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_cge_trans( LAPACK_ROW_MAJOR, nrowsV, k, v, ldv, v_t, ldv_t ); + LAPACKE_cge_trans( LAPACK_ROW_MAJOR, nb, k, t, ldt, t_t, ldt_t ); + LAPACKE_cge_trans( LAPACK_ROW_MAJOR, nrowsA, ncolsA, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( LAPACK_ROW_MAJOR, m, n, b, ldb, b_t, ldb_t ); /* Call LAPACK function and adjust info */ LAPACK_ctpmqrt( &side, &trans, &m, &n, &k, &l, &nb, v_t, &ldv_t, t_t, &ldt_t, a_t, &lda_t, b_t, &ldb_t, work, &info ); @@ -116,7 +124,7 @@ lapack_int LAPACKE_ctpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_cge_trans( LAPACK_COL_MAJOR, k, m, a_t, lda_t, a, lda ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrowsA, ncolsA, a_t, lda_t, a, lda ); LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); diff --git a/LAPACKE/src/lapacke_dlantr.c b/LAPACKE/src/lapacke_dlantr.c index 4d1be93d73..b20af0eb46 100644 --- a/LAPACKE/src/lapacke_dlantr.c +++ b/LAPACKE/src/lapacke_dlantr.c @@ -46,7 +46,7 @@ double LAPACKE_dlantr( int matrix_layout, char norm, char uplo, char diag, #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - if( LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + if( LAPACKE_dtz_nancheck( matrix_layout, 'f', uplo, diag, m, n, a, lda ) ) { return -7; } } diff --git a/LAPACKE/src/lapacke_dlarfb.c b/LAPACKE/src/lapacke_dlarfb.c index 3c3c24c54b..f4ddc62a58 100644 --- a/LAPACKE/src/lapacke_dlarfb.c +++ b/LAPACKE/src/lapacke_dlarfb.c @@ -41,7 +41,9 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct lapack_int info = 0; lapack_int ldwork; double* work = NULL; - lapack_int ncols_v, nrows_v; + lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dlarfb", -1 ); return -1; @@ -49,59 +51,27 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - lapack_int lrv, lcv; /* row, column stride */ - if( matrix_layout == LAPACK_COL_MAJOR ) { - lrv = 1; - lcv = ldv; - } else { - lrv = ldv; - lcv = 1; - } - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { + LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); + return -8; + } + if( LAPACKE_dtz_nancheck( matrix_layout, direct, uplo, 'u', + nrows_v, ncols_v, v, ldv ) ) { + return -9; } if( LAPACKE_dge_nancheck( matrix_layout, k, k, t, ldt ) ) { return -11; } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_dtr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v-k, ncols_v, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); - return -8; - } - if( LAPACKE_dtr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*lrv], ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_dtr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v-k, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); - return -8; - } - if( LAPACKE_dtr_nancheck( matrix_layout, 'l', 'u', k, - &v[(ncols_v-k)*lcv], ldv ) ) - return -9; - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; + if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } } #endif diff --git a/LAPACKE/src/lapacke_dlarfb_work.c b/LAPACKE/src/lapacke_dlarfb_work.c index 57c53bae31..de444c1466 100644 --- a/LAPACKE/src/lapacke_dlarfb_work.c +++ b/LAPACKE/src/lapacke_dlarfb_work.c @@ -41,6 +41,8 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, { lapack_int info = 0; lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; lapack_int ldc_t, ldt_t, ldv_t; double *v_t = NULL, *t_t = NULL, *c_t = NULL; if( matrix_layout == LAPACK_COL_MAJOR ) { @@ -51,16 +53,14 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); + + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + ldc_t = MAX(1,m); ldt_t = MAX(1,k); ldv_t = MAX(1,nrows_v); @@ -80,6 +80,11 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); return info; } + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_dlarfb_work", info ); + return info; + } /* Allocate memory for temporary array(s) */ v_t = (double*) LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,ncols_v) ); @@ -98,36 +103,8 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, goto exit_level_2; } /* Transpose input matrices */ - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_dtr_trans( matrix_layout, 'l', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_dge_trans( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], ldv, - &v_t[k], ldv_t ); - } else if( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb_work", -8 ); - return -8; - } - LAPACKE_dtr_trans( matrix_layout, 'u', 'u', k, &v[(nrows_v-k)*ldv], - ldv, &v_t[nrows_v-k], ldv_t ); - LAPACKE_dge_trans( matrix_layout, nrows_v-k, ncols_v, v, ldv, v_t, - ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_dtr_trans( matrix_layout, 'u', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_dge_trans( matrix_layout, nrows_v, ncols_v-k, &v[k], ldv, - &v_t[k*ldv_t], ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_dlarfb_work", -8 ); - return -8; - } - LAPACKE_dtr_trans( matrix_layout, 'l', 'u', k, &v[ncols_v-k], ldv, - &v_t[(ncols_v-k)*ldv_t], ldv_t ); - LAPACKE_dge_trans( matrix_layout, nrows_v, ncols_v-k, v, ldv, v_t, - ldv_t ); - } + LAPACKE_dtz_trans( matrix_layout, direct, uplo, 'u', nrows_v, ncols_v, + v, ldv, v_t, ldv_t ); LAPACKE_dge_trans( matrix_layout, k, k, t, ldt, t_t, ldt_t ); LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ diff --git a/LAPACKE/src/lapacke_dtgsen.c b/LAPACKE/src/lapacke_dtgsen.c index 11496c1c0f..883a795798 100644 --- a/LAPACKE/src/lapacke_dtgsen.c +++ b/LAPACKE/src/lapacke_dtgsen.c @@ -83,12 +83,10 @@ lapack_int LAPACKE_dtgsen( int matrix_layout, lapack_int ijob, liwork = iwork_query; lwork = (lapack_int)work_query; /* Allocate memory for work arrays */ - if( ijob != 0 ) { - iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if( iwork == NULL ) { - info = LAPACK_WORK_MEMORY_ERROR; - goto exit_level_0; - } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; } work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); if( work == NULL ) { @@ -103,9 +101,7 @@ lapack_int LAPACKE_dtgsen( int matrix_layout, lapack_int ijob, /* Release memory and exit */ LAPACKE_free( work ); exit_level_1: - if( ijob != 0 ) { - LAPACKE_free( iwork ); - } + LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_dtgsen", info ); diff --git a/LAPACKE/src/lapacke_dtpmqrt_work.c b/LAPACKE/src/lapacke_dtpmqrt_work.c index d9ee6226be..366acd3690 100644 --- a/LAPACKE/src/lapacke_dtpmqrt_work.c +++ b/LAPACKE/src/lapacke_dtpmqrt_work.c @@ -48,16 +48,24 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,k); + lapack_int nrowsA, ncolsA, nrowsV; + if ( side == LAPACKE_lsame(side, 'l') ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == LAPACKE_lsame(side, 'r') ) { nrowsA = m; ncolsA = k; nrowsV = n; } + else { + info = -2; + LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); + return info; + } + lapack_int lda_t = MAX(1,nrowsA); lapack_int ldb_t = MAX(1,m); - lapack_int ldt_t = MAX(1,ldt); - lapack_int ldv_t = MAX(1,ldv); + lapack_int ldt_t = MAX(1,nb); + lapack_int ldv_t = MAX(1,nrowsV); double* v_t = NULL; double* t_t = NULL; double* a_t = NULL; double* b_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); return info; @@ -67,7 +75,7 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); return info; } - if( ldt < nb ) { + if( ldt < k ) { info = -12; LAPACKE_xerbla( "LAPACKE_dtpmqrt_work", info ); return info; @@ -83,12 +91,12 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; } - t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,nb) ); + t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,k) ); if( t_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } - a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,m) ); + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,ncolsA) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; @@ -99,10 +107,10 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_3; } /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_layout, ldv, k, v, ldv, v_t, ldv_t ); - LAPACKE_dge_trans( matrix_layout, ldt, nb, t, ldt, t_t, ldt_t ); - LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); - LAPACKE_dge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_dge_trans( LAPACK_ROW_MAJOR, nrowsV, k, v, ldv, v_t, ldv_t ); + LAPACKE_dge_trans( LAPACK_ROW_MAJOR, nb, k, t, ldt, t_t, ldt_t ); + LAPACKE_dge_trans( LAPACK_ROW_MAJOR, nrowsA, ncolsA, a, lda, a_t, lda_t ); + LAPACKE_dge_trans( LAPACK_ROW_MAJOR, m, n, b, ldb, b_t, ldb_t ); /* Call LAPACK function and adjust info */ LAPACK_dtpmqrt( &side, &trans, &m, &n, &k, &l, &nb, v_t, &ldv_t, t_t, &ldt_t, a_t, &lda_t, b_t, &ldb_t, work, &info ); @@ -110,7 +118,7 @@ lapack_int LAPACKE_dtpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_dge_trans( LAPACK_COL_MAJOR, k, m, a_t, lda_t, a, lda ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrowsA, ncolsA, a_t, lda_t, a, lda ); LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); diff --git a/LAPACKE/src/lapacke_dtrsyl3.c b/LAPACKE/src/lapacke_dtrsyl3.c index 523235c93a..c95a772deb 100644 --- a/LAPACKE/src/lapacke_dtrsyl3.c +++ b/LAPACKE/src/lapacke_dtrsyl3.c @@ -11,6 +11,7 @@ lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb, double* swork = NULL; lapack_int ldswork = -1; lapack_int swork_size = -1; + lapack_int iwork_query; lapack_int* iwork = NULL; lapack_int liwork = -1; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -33,7 +34,7 @@ lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb, #endif /* Query optimal working array sizes */ info = LAPACKE_dtrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda, - b, ldb, c, ldc, scale, iwork, liwork, + b, ldb, c, ldc, scale, &iwork_query, liwork, swork_query, ldswork ); if( info != 0 ) { goto exit_level_0; @@ -45,8 +46,9 @@ lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb, info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } + liwork = iwork_query; iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if (iwork == NULL ) { + if ( iwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_1; } diff --git a/LAPACKE/src/lapacke_slantr.c b/LAPACKE/src/lapacke_slantr.c index 2f4c65889a..e2f67cfd6c 100644 --- a/LAPACKE/src/lapacke_slantr.c +++ b/LAPACKE/src/lapacke_slantr.c @@ -46,7 +46,7 @@ float LAPACKE_slantr( int matrix_layout, char norm, char uplo, char diag, #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - if( LAPACKE_str_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + if( LAPACKE_stz_nancheck( matrix_layout, 'f', uplo, diag, m, n, a, lda ) ) { return -7; } } diff --git a/LAPACKE/src/lapacke_slarfb.c b/LAPACKE/src/lapacke_slarfb.c index 37d51dee58..d36958f93d 100644 --- a/LAPACKE/src/lapacke_slarfb.c +++ b/LAPACKE/src/lapacke_slarfb.c @@ -41,7 +41,9 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct lapack_int info = 0; lapack_int ldwork; float* work = NULL; - lapack_int ncols_v, nrows_v; + lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_slarfb", -1 ); return -1; @@ -49,59 +51,27 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - lapack_int lrv, lcv; /* row, column stride */ - if( matrix_layout == LAPACK_COL_MAJOR ) { - lrv = 1; - lcv = ldv; - } else { - lrv = ldv; - lcv = 1; - } - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { + LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); + return -8; + } + if( LAPACKE_stz_nancheck( matrix_layout, direct, uplo, 'u', + nrows_v, ncols_v, v, ldv ) ) { + return -9; } if( LAPACKE_sge_nancheck( matrix_layout, k, k, t, ldt ) ) { return -11; } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_str_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v-k, ncols_v, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); - return -8; - } - if( LAPACKE_str_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*lrv], ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_str_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v-k, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); - return -8; - } - if( LAPACKE_str_nancheck( matrix_layout, 'l', 'u', k, - &v[(ncols_v-k)*lcv], ldv ) ) - return -9; - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; + if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } } #endif diff --git a/LAPACKE/src/lapacke_slarfb_work.c b/LAPACKE/src/lapacke_slarfb_work.c index 2f5d616767..8b61276337 100644 --- a/LAPACKE/src/lapacke_slarfb_work.c +++ b/LAPACKE/src/lapacke_slarfb_work.c @@ -41,6 +41,8 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, { lapack_int info = 0; lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; lapack_int ldc_t, ldt_t, ldv_t; float *v_t = NULL, *t_t = NULL, *c_t = NULL; if( matrix_layout == LAPACK_COL_MAJOR ) { @@ -51,16 +53,14 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); + + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + ldc_t = MAX(1,m); ldt_t = MAX(1,k); ldv_t = MAX(1,nrows_v); @@ -80,6 +80,11 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); return info; } + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_slarfb_work", info ); + return info; + } /* Allocate memory for temporary array(s) */ v_t = (float*)LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,ncols_v) ); if( v_t == NULL ) { @@ -97,36 +102,8 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, goto exit_level_2; } /* Transpose input matrices */ - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_str_trans( matrix_layout, 'l', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_sge_trans( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], ldv, - &v_t[k], ldv_t ); - } else if( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb_work", -8 ); - return -8; - } - LAPACKE_str_trans( matrix_layout, 'u', 'u', k, &v[(nrows_v-k)*ldv], - ldv, &v_t[nrows_v-k], ldv_t ); - LAPACKE_sge_trans( matrix_layout, nrows_v-k, ncols_v, v, ldv, v_t, - ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_str_trans( matrix_layout, 'u', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_sge_trans( matrix_layout, nrows_v, ncols_v-k, &v[k], ldv, - &v_t[k*ldv_t], ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_slarfb_work", -8 ); - return -8; - } - LAPACKE_str_trans( matrix_layout, 'l', 'u', k, &v[ncols_v-k], ldv, - &v_t[(ncols_v-k)*ldv_t], ldv_t ); - LAPACKE_sge_trans( matrix_layout, nrows_v, ncols_v-k, v, ldv, v_t, - ldv_t ); - } + LAPACKE_stz_trans( matrix_layout, direct, uplo, 'u', nrows_v, ncols_v, + v, ldv, v_t, ldv_t ); LAPACKE_sge_trans( matrix_layout, k, k, t, ldt, t_t, ldt_t ); LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ diff --git a/LAPACKE/src/lapacke_stgsen.c b/LAPACKE/src/lapacke_stgsen.c index c3b0c4bf82..db5b7e91c1 100644 --- a/LAPACKE/src/lapacke_stgsen.c +++ b/LAPACKE/src/lapacke_stgsen.c @@ -83,12 +83,10 @@ lapack_int LAPACKE_stgsen( int matrix_layout, lapack_int ijob, liwork = iwork_query; lwork = (lapack_int)work_query; /* Allocate memory for work arrays */ - if( ijob != 0 ) { - iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if( iwork == NULL ) { - info = LAPACK_WORK_MEMORY_ERROR; - goto exit_level_0; - } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; } work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); if( work == NULL ) { @@ -103,9 +101,7 @@ lapack_int LAPACKE_stgsen( int matrix_layout, lapack_int ijob, /* Release memory and exit */ LAPACKE_free( work ); exit_level_1: - if( ijob != 0 ) { - LAPACKE_free( iwork ); - } + LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_stgsen", info ); diff --git a/LAPACKE/src/lapacke_stpmqrt_work.c b/LAPACKE/src/lapacke_stpmqrt_work.c index 095fbdcd9f..c5a3a14965 100644 --- a/LAPACKE/src/lapacke_stpmqrt_work.c +++ b/LAPACKE/src/lapacke_stpmqrt_work.c @@ -48,16 +48,24 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,k); + lapack_int nrowsA, ncolsA, nrowsV; + if ( side == LAPACKE_lsame(side, 'l') ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == LAPACKE_lsame(side, 'r') ) { nrowsA = m; ncolsA = k; nrowsV = n; } + else { + info = -2; + LAPACKE_xerbla( "LAPACKE_stpmqrt_work", info ); + return info; + } + lapack_int lda_t = MAX(1,nrowsA); lapack_int ldb_t = MAX(1,m); - lapack_int ldt_t = MAX(1,ldt); - lapack_int ldv_t = MAX(1,ldv); + lapack_int ldt_t = MAX(1,nb); + lapack_int ldv_t = MAX(1,nrowsV); float* v_t = NULL; float* t_t = NULL; float* a_t = NULL; float* b_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_stpmqrt_work", info ); return info; @@ -67,7 +75,7 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_stpmqrt_work", info ); return info; } - if( ldt < nb ) { + if( ldt < k ) { info = -12; LAPACKE_xerbla( "LAPACKE_stpmqrt_work", info ); return info; @@ -83,12 +91,12 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; } - t_t = (float*)LAPACKE_malloc( sizeof(float) * ldt_t * MAX(1,nb) ); + t_t = (float*)LAPACKE_malloc( sizeof(float) * ldt_t * MAX(1,k) ); if( t_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } - a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) ); + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,ncolsA) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; @@ -99,10 +107,10 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_3; } /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_layout, ldv, k, v, ldv, v_t, ldv_t ); - LAPACKE_sge_trans( matrix_layout, ldt, nb, t, ldt, t_t, ldt_t ); - LAPACKE_sge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); - LAPACKE_sge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_sge_trans( LAPACK_ROW_MAJOR, nrowsV, k, v, ldv, v_t, ldv_t ); + LAPACKE_sge_trans( LAPACK_ROW_MAJOR, nb, k, t, ldt, t_t, ldt_t ); + LAPACKE_sge_trans( LAPACK_ROW_MAJOR, nrowsA, ncolsA, a, lda, a_t, lda_t ); + LAPACKE_sge_trans( LAPACK_ROW_MAJOR, m, n, b, ldb, b_t, ldb_t ); /* Call LAPACK function and adjust info */ LAPACK_stpmqrt( &side, &trans, &m, &n, &k, &l, &nb, v_t, &ldv_t, t_t, &ldt_t, a_t, &lda_t, b_t, &ldb_t, work, &info ); @@ -110,7 +118,7 @@ lapack_int LAPACKE_stpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_sge_trans( LAPACK_COL_MAJOR, k, m, a_t, lda_t, a, lda ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrowsA, ncolsA, a_t, lda_t, a, lda ); LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); diff --git a/LAPACKE/src/lapacke_strsyl3.c b/LAPACKE/src/lapacke_strsyl3.c index 6db54f21f5..1cfc626c22 100644 --- a/LAPACKE/src/lapacke_strsyl3.c +++ b/LAPACKE/src/lapacke_strsyl3.c @@ -11,6 +11,7 @@ lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb, float* swork = NULL; lapack_int ldswork = -1; lapack_int swork_size = -1; + lapack_int iwork_query; lapack_int* iwork = NULL; lapack_int liwork = -1; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { @@ -33,7 +34,7 @@ lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb, #endif /* Query optimal working array sizes */ info = LAPACKE_strsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda, - b, ldb, c, ldc, scale, iwork, liwork, + b, ldb, c, ldc, scale, &iwork_query, liwork, swork_query, ldswork ); if( info != 0 ) { goto exit_level_0; @@ -45,8 +46,9 @@ lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb, info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } + liwork = iwork_query; iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if (iwork == NULL ) { + if ( iwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_1; } diff --git a/LAPACKE/src/lapacke_zlantr.c b/LAPACKE/src/lapacke_zlantr.c index f6656d84da..4c078b9b0f 100644 --- a/LAPACKE/src/lapacke_zlantr.c +++ b/LAPACKE/src/lapacke_zlantr.c @@ -46,7 +46,7 @@ double LAPACKE_zlantr( int matrix_layout, char norm, char uplo, char diag, #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - if( LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, MIN(m,n), a, lda ) ) { + if( LAPACKE_ztz_nancheck( matrix_layout, 'f', uplo, diag, m, n, a, lda ) ) { return -7; } } diff --git a/LAPACKE/src/lapacke_zlarfb.c b/LAPACKE/src/lapacke_zlarfb.c index 7cd23dde8f..85355b202d 100644 --- a/LAPACKE/src/lapacke_zlarfb.c +++ b/LAPACKE/src/lapacke_zlarfb.c @@ -42,7 +42,9 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct lapack_int info = 0; lapack_int ldwork; lapack_complex_double* work = NULL; - lapack_int ncols_v, nrows_v; + lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zlarfb", -1 ); return -1; @@ -50,59 +52,27 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ - lapack_int lrv, lcv; /* row, column stride */ - if( matrix_layout == LAPACK_COL_MAJOR ) { - lrv = 1; - lcv = ldv; - } else { - lrv = ldv; - lcv = 1; - } - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { - return -13; + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { + LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); + return -8; + } + if( LAPACKE_ztz_nancheck( matrix_layout, direct, uplo, 'u', + nrows_v, ncols_v, v, ldv ) ) { + return -9; } if( LAPACKE_zge_nancheck( matrix_layout, k, k, t, ldt ) ) { return -11; } - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ztr_nancheck( matrix_layout, 'l', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v-k, ncols_v, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); - return -8; - } - if( LAPACKE_ztr_nancheck( matrix_layout, 'u', 'u', k, - &v[(nrows_v-k)*lrv], ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v-k, ncols_v, v, ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'f' ) ) { - if( LAPACKE_ztr_nancheck( matrix_layout, 'u', 'u', k, v, ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v-k, - &v[k*lrv], ldv ) ) - return -9; - } else if( LAPACKE_lsame( storev, 'r' ) && LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); - return -8; - } - if( LAPACKE_ztr_nancheck( matrix_layout, 'l', 'u', k, - &v[(ncols_v-k)*lcv], ldv ) ) - return -9; - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v-k, v, ldv ) ) - return -9; + if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) { + return -13; } } #endif diff --git a/LAPACKE/src/lapacke_zlarfb_work.c b/LAPACKE/src/lapacke_zlarfb_work.c index 1b4f892a17..72d85ec82a 100644 --- a/LAPACKE/src/lapacke_zlarfb_work.c +++ b/LAPACKE/src/lapacke_zlarfb_work.c @@ -42,6 +42,8 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, { lapack_int info = 0; lapack_int nrows_v, ncols_v; + lapack_logical left, col, forward; + char uplo; lapack_int ldc_t, ldt_t, ldv_t; lapack_complex_double *v_t = NULL, *t_t = NULL, *c_t = NULL; if( matrix_layout == LAPACK_COL_MAJOR ) { @@ -52,16 +54,14 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - nrows_v = ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : - ( LAPACKE_lsame( storev, 'r' ) ? k : 1) ); - ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'l' ) ) ? m : - ( ( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( side, 'r' ) ) ? n : 1) ); + left = LAPACKE_lsame( side, 'l' ); + col = LAPACKE_lsame( storev, 'c' ); + forward = LAPACKE_lsame( direct, 'f' ); + + nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); + ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); + uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + ldc_t = MAX(1,m); ldt_t = MAX(1,k); ldv_t = MAX(1,nrows_v); @@ -81,6 +81,11 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); return info; } + if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zlarfb_work", info ); + return info; + } /* Allocate memory for temporary array(s) */ v_t = (lapack_complex_double*) LAPACKE_malloc( sizeof(lapack_complex_double) * @@ -102,36 +107,8 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, goto exit_level_2; } /* Transpose input matrices */ - if( LAPACKE_lsame( storev, 'c' ) && LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_ztr_trans( matrix_layout, 'l', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_zge_trans( matrix_layout, nrows_v-k, ncols_v, &v[k*ldv], ldv, - &v_t[k], ldv_t ); - } else if( LAPACKE_lsame( storev, 'c' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > nrows_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb_work", -8 ); - return -8; - } - LAPACKE_ztr_trans( matrix_layout, 'u', 'u', k, &v[(nrows_v-k)*ldv], - ldv, &v_t[nrows_v-k], ldv_t ); - LAPACKE_zge_trans( matrix_layout, nrows_v-k, ncols_v, v, ldv, v_t, - ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'f' ) ) { - LAPACKE_ztr_trans( matrix_layout, 'u', 'u', k, v, ldv, v_t, ldv_t ); - LAPACKE_zge_trans( matrix_layout, nrows_v, ncols_v-k, &v[k], ldv, - &v_t[k*ldv_t], ldv_t ); - } else if( LAPACKE_lsame( storev, 'r' ) && - LAPACKE_lsame( direct, 'b' ) ) { - if( k > ncols_v ) { - LAPACKE_xerbla( "LAPACKE_zlarfb_work", -8 ); - return -8; - } - LAPACKE_ztr_trans( matrix_layout, 'l', 'u', k, &v[ncols_v-k], ldv, - &v_t[(ncols_v-k)*ldv_t], ldv_t ); - LAPACKE_zge_trans( matrix_layout, nrows_v, ncols_v-k, v, ldv, v_t, - ldv_t ); - } + LAPACKE_ztz_trans( matrix_layout, direct, uplo, 'u', nrows_v, ncols_v, + v, ldv, v_t, ldv_t ); LAPACKE_zge_trans( matrix_layout, k, k, t, ldt, t_t, ldt_t ); LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ diff --git a/LAPACKE/src/lapacke_ztgsen.c b/LAPACKE/src/lapacke_ztgsen.c index 8c86d5e00e..039da18b48 100644 --- a/LAPACKE/src/lapacke_ztgsen.c +++ b/LAPACKE/src/lapacke_ztgsen.c @@ -86,12 +86,10 @@ lapack_int LAPACKE_ztgsen( int matrix_layout, lapack_int ijob, liwork = iwork_query; lwork = LAPACK_Z2INT( work_query ); /* Allocate memory for work arrays */ - if( ijob != 0 ) { - iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); - if( iwork == NULL ) { - info = LAPACK_WORK_MEMORY_ERROR; - goto exit_level_0; - } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; } work = (lapack_complex_double*) LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); @@ -106,9 +104,7 @@ lapack_int LAPACKE_ztgsen( int matrix_layout, lapack_int ijob, /* Release memory and exit */ LAPACKE_free( work ); exit_level_1: - if( ijob != 0 ) { - LAPACKE_free( iwork ); - } + LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_ztgsen", info ); diff --git a/LAPACKE/src/lapacke_ztpmqrt_work.c b/LAPACKE/src/lapacke_ztpmqrt_work.c index 643ae1d9d5..104efa8f3c 100644 --- a/LAPACKE/src/lapacke_ztpmqrt_work.c +++ b/LAPACKE/src/lapacke_ztpmqrt_work.c @@ -50,16 +50,24 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,k); + lapack_int nrowsA, ncolsA, nrowsV; + if ( side == LAPACKE_lsame(side, 'l') ) { nrowsA = k; ncolsA = n; nrowsV = m; } + else if ( side == LAPACKE_lsame(side, 'r') ) { nrowsA = m; ncolsA = k; nrowsV = n; } + else { + info = -2; + LAPACKE_xerbla( "LAPACKE_ztpmqrt_work", info ); + return info; + } + lapack_int lda_t = MAX(1,nrowsA); lapack_int ldb_t = MAX(1,m); - lapack_int ldt_t = MAX(1,ldt); - lapack_int ldv_t = MAX(1,ldv); + lapack_int ldt_t = MAX(1,nb); + lapack_int ldv_t = MAX(1,nrowsV); lapack_complex_double* v_t = NULL; lapack_complex_double* t_t = NULL; lapack_complex_double* a_t = NULL; lapack_complex_double* b_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < ncolsA ) { info = -14; LAPACKE_xerbla( "LAPACKE_ztpmqrt_work", info ); return info; @@ -69,7 +77,7 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, LAPACKE_xerbla( "LAPACKE_ztpmqrt_work", info ); return info; } - if( ldt < nb ) { + if( ldt < k ) { info = -12; LAPACKE_xerbla( "LAPACKE_ztpmqrt_work", info ); return info; @@ -87,13 +95,13 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_0; } t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * ldt_t * MAX(1,nb) ); + LAPACKE_malloc( sizeof(lapack_complex_double) * ldt_t * MAX(1,k) ); if( t_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } a_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,m) ); + LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,ncolsA) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; @@ -105,10 +113,10 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, goto exit_level_3; } /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_layout, ldv, k, v, ldv, v_t, ldv_t ); - LAPACKE_zge_trans( matrix_layout, ldt, nb, t, ldt, t_t, ldt_t ); - LAPACKE_zge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); - LAPACKE_zge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_zge_trans( LAPACK_ROW_MAJOR, nrowsV, k, v, ldv, v_t, ldv_t ); + LAPACKE_zge_trans( LAPACK_ROW_MAJOR, nb, k, t, ldt, t_t, ldt_t ); + LAPACKE_zge_trans( LAPACK_ROW_MAJOR, nrowsA, ncolsA, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( LAPACK_ROW_MAJOR, m, n, b, ldb, b_t, ldb_t ); /* Call LAPACK function and adjust info */ LAPACK_ztpmqrt( &side, &trans, &m, &n, &k, &l, &nb, v_t, &ldv_t, t_t, &ldt_t, a_t, &lda_t, b_t, &ldb_t, work, &info ); @@ -116,7 +124,7 @@ lapack_int LAPACKE_ztpmqrt_work( int matrix_layout, char side, char trans, info = info - 1; } /* Transpose output matrices */ - LAPACKE_zge_trans( LAPACK_COL_MAJOR, k, m, a_t, lda_t, a, lda ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrowsA, ncolsA, a_t, lda_t, a, lda ); LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); /* Release memory and exit */ LAPACKE_free( b_t ); diff --git a/LAPACKE/utils/CMakeLists.txt b/LAPACKE/utils/CMakeLists.txt index dd36ee33e7..dfb9aa3702 100644 --- a/LAPACKE/utils/CMakeLists.txt +++ b/LAPACKE/utils/CMakeLists.txt @@ -1,39 +1,46 @@ set(UTILS -lapacke_c_nancheck.c lapacke_ctr_trans.c lapacke_make_complex_float.c lapacke_zgb_nancheck.c -lapacke_cgb_nancheck.c lapacke_d_nancheck.c lapacke_s_nancheck.c lapacke_zgb_trans.c -lapacke_cgb_trans.c lapacke_dgb_nancheck.c lapacke_sgb_nancheck.c lapacke_zge_nancheck.c -lapacke_cge_nancheck.c lapacke_dgb_trans.c lapacke_sgb_trans.c lapacke_zge_trans.c -lapacke_cge_trans.c lapacke_dge_nancheck.c lapacke_sge_nancheck.c lapacke_zgg_nancheck.c -lapacke_cgg_nancheck.c lapacke_dge_trans.c lapacke_sge_trans.c lapacke_zgg_trans.c -lapacke_cgg_trans.c lapacke_dgg_nancheck.c lapacke_sgg_nancheck.c lapacke_zgt_nancheck.c -lapacke_cgt_nancheck.c lapacke_dgg_trans.c lapacke_sgg_trans.c lapacke_zhb_nancheck.c -lapacke_chb_nancheck.c lapacke_dgt_nancheck.c lapacke_sgt_nancheck.c lapacke_zhb_trans.c -lapacke_chb_trans.c lapacke_dhs_nancheck.c lapacke_shs_nancheck.c lapacke_zhe_nancheck.c -lapacke_che_nancheck.c lapacke_dhs_trans.c lapacke_shs_trans.c lapacke_zhe_trans.c -lapacke_che_trans.c lapacke_dpb_nancheck.c lapacke_spb_nancheck.c lapacke_zhp_nancheck.c -lapacke_chp_nancheck.c lapacke_dpb_trans.c lapacke_spb_trans.c lapacke_zhp_trans.c -lapacke_chp_trans.c lapacke_dpf_nancheck.c lapacke_spf_nancheck.c lapacke_zhs_nancheck.c -lapacke_chs_nancheck.c lapacke_dpf_trans.c lapacke_spf_trans.c lapacke_zhs_trans.c -lapacke_chs_trans.c lapacke_dpo_nancheck.c lapacke_spo_nancheck.c lapacke_zpb_nancheck.c -lapacke_cpb_nancheck.c lapacke_dpo_trans.c lapacke_spo_trans.c lapacke_zpb_trans.c -lapacke_cpb_trans.c lapacke_dpp_nancheck.c lapacke_spp_nancheck.c lapacke_zpf_nancheck.c -lapacke_cpf_nancheck.c lapacke_dpp_trans.c lapacke_spp_trans.c lapacke_zpf_trans.c -lapacke_cpf_trans.c lapacke_dpt_nancheck.c lapacke_spt_nancheck.c lapacke_zpo_nancheck.c -lapacke_cpo_nancheck.c lapacke_dsb_nancheck.c lapacke_ssb_nancheck.c lapacke_zpo_trans.c -lapacke_cpo_trans.c lapacke_dsb_trans.c lapacke_ssb_trans.c lapacke_zpp_nancheck.c -lapacke_cpp_nancheck.c lapacke_dsp_nancheck.c lapacke_ssp_nancheck.c lapacke_zpp_trans.c -lapacke_cpp_trans.c lapacke_dsp_trans.c lapacke_ssp_trans.c lapacke_zpt_nancheck.c -lapacke_cpt_nancheck.c lapacke_dst_nancheck.c lapacke_sst_nancheck.c lapacke_zsp_nancheck.c -lapacke_csp_nancheck.c lapacke_dsy_nancheck.c lapacke_ssy_nancheck.c lapacke_zsp_trans.c -lapacke_csp_trans.c lapacke_dsy_trans.c lapacke_ssy_trans.c lapacke_zst_nancheck.c -lapacke_cst_nancheck.c lapacke_dtb_nancheck.c lapacke_stb_nancheck.c lapacke_zsy_nancheck.c -lapacke_csy_nancheck.c lapacke_dtb_trans.c lapacke_stb_trans.c lapacke_zsy_trans.c -lapacke_csy_trans.c lapacke_dtf_nancheck.c lapacke_stf_nancheck.c lapacke_ztb_nancheck.c -lapacke_ctb_nancheck.c lapacke_dtf_trans.c lapacke_stf_trans.c lapacke_ztb_trans.c -lapacke_ctb_trans.c lapacke_dtp_nancheck.c lapacke_stp_nancheck.c lapacke_ztf_nancheck.c -lapacke_ctf_nancheck.c lapacke_dtp_trans.c lapacke_stp_trans.c lapacke_ztf_trans.c -lapacke_ctf_trans.c lapacke_dtr_nancheck.c lapacke_str_nancheck.c lapacke_ztp_nancheck.c -lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c -lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c -lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c +lapacke_c_nancheck.c lapacke_d_nancheck.c lapacke_s_nancheck.c lapacke_z_nancheck.c +lapacke_cgb_nancheck.c lapacke_dgb_nancheck.c lapacke_sgb_nancheck.c lapacke_zgb_trans.c +lapacke_cgb_trans.c lapacke_dgb_trans.c lapacke_sgb_trans.c lapacke_zgb_nancheck.c +lapacke_cge_nancheck.c lapacke_dge_nancheck.c lapacke_sge_nancheck.c lapacke_zge_nancheck.c +lapacke_cge_trans.c lapacke_dge_trans.c lapacke_sge_trans.c lapacke_zge_trans.c +lapacke_cgg_nancheck.c lapacke_dgg_nancheck.c lapacke_sgg_nancheck.c lapacke_zgg_nancheck.c +lapacke_cgg_trans.c lapacke_dgg_trans.c lapacke_sgg_trans.c lapacke_zgg_trans.c +lapacke_cgt_nancheck.c lapacke_dgt_nancheck.c lapacke_sgt_nancheck.c lapacke_zgt_nancheck.c +lapacke_chb_nancheck.c lapacke_dsb_nancheck.c lapacke_ssb_nancheck.c lapacke_zhb_nancheck.c +lapacke_chb_trans.c lapacke_dsb_trans.c lapacke_ssb_trans.c lapacke_zhb_trans.c +lapacke_che_nancheck.c lapacke_zhe_nancheck.c +lapacke_che_trans.c lapacke_zhe_trans.c +lapacke_chp_nancheck.c lapacke_zhp_nancheck.c +lapacke_chp_trans.c lapacke_zhp_trans.c +lapacke_chs_nancheck.c lapacke_dhs_nancheck.c lapacke_shs_nancheck.c lapacke_zhs_nancheck.c +lapacke_chs_trans.c lapacke_dhs_trans.c lapacke_shs_trans.c lapacke_zhs_trans.c +lapacke_cpb_nancheck.c lapacke_dpb_nancheck.c lapacke_spb_nancheck.c lapacke_zpb_nancheck.c +lapacke_cpb_trans.c lapacke_dpb_trans.c lapacke_spb_trans.c lapacke_zpb_trans.c +lapacke_cpf_nancheck.c lapacke_dpf_nancheck.c lapacke_spf_nancheck.c lapacke_zpf_nancheck.c +lapacke_cpf_trans.c lapacke_dpf_trans.c lapacke_spf_trans.c lapacke_zpf_trans.c +lapacke_cpo_nancheck.c lapacke_dpo_nancheck.c lapacke_spo_nancheck.c lapacke_zpo_nancheck.c +lapacke_cpo_trans.c lapacke_dpo_trans.c lapacke_spo_trans.c lapacke_zpo_trans.c +lapacke_cpp_nancheck.c lapacke_dpp_nancheck.c lapacke_spp_nancheck.c lapacke_zpp_nancheck.c +lapacke_cpp_trans.c lapacke_dpp_trans.c lapacke_spp_trans.c lapacke_zpp_trans.c +lapacke_cpt_nancheck.c lapacke_dpt_nancheck.c lapacke_spt_nancheck.c lapacke_zpt_nancheck.c +lapacke_csp_nancheck.c lapacke_dsp_nancheck.c lapacke_ssp_nancheck.c lapacke_zsp_nancheck.c +lapacke_csp_trans.c lapacke_dsp_trans.c lapacke_ssp_trans.c lapacke_zsp_trans.c +lapacke_cst_nancheck.c lapacke_dst_nancheck.c lapacke_sst_nancheck.c lapacke_zst_nancheck.c +lapacke_csy_nancheck.c lapacke_dsy_nancheck.c lapacke_ssy_nancheck.c lapacke_zsy_nancheck.c +lapacke_csy_trans.c lapacke_dsy_trans.c lapacke_ssy_trans.c lapacke_zsy_trans.c +lapacke_ctb_nancheck.c lapacke_dtb_nancheck.c lapacke_stb_nancheck.c lapacke_ztb_nancheck.c +lapacke_ctb_trans.c lapacke_dtb_trans.c lapacke_stb_trans.c lapacke_ztb_trans.c +lapacke_ctf_nancheck.c lapacke_dtf_nancheck.c lapacke_stf_nancheck.c lapacke_ztf_nancheck.c +lapacke_ctf_trans.c lapacke_dtf_trans.c lapacke_stf_trans.c lapacke_ztf_trans.c +lapacke_ctp_nancheck.c lapacke_dtp_nancheck.c lapacke_stp_nancheck.c lapacke_ztp_nancheck.c +lapacke_ctp_trans.c lapacke_dtp_trans.c lapacke_stp_trans.c lapacke_ztp_trans.c +lapacke_ctr_nancheck.c lapacke_dtr_nancheck.c lapacke_str_nancheck.c lapacke_ztr_nancheck.c +lapacke_ctr_trans.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztr_trans.c +lapacke_ctz_nancheck.c lapacke_dtz_nancheck.c lapacke_stz_nancheck.c lapacke_ztz_nancheck.c +lapacke_ctz_trans.c lapacke_dtz_trans.c lapacke_stz_trans.c lapacke_ztz_trans.c + +lapacke_make_complex_float.c lapacke_make_complex_double.c +lapacke_lsame.c +lapacke_xerbla.c ) diff --git a/LAPACKE/utils/Makefile b/LAPACKE/utils/Makefile index adc5736507..a1f8631071 100644 --- a/LAPACKE/utils/Makefile +++ b/LAPACKE/utils/Makefile @@ -76,6 +76,8 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_ctp_trans.o \ lapacke_ctr_nancheck.o \ lapacke_ctr_trans.o \ + lapacke_ctz_nancheck.o \ + lapacke_ctz_trans.o \ lapacke_dgb_nancheck.o \ lapacke_dgb_trans.o \ lapacke_dge_nancheck.o \ @@ -110,6 +112,8 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_dtp_trans.o \ lapacke_dtr_nancheck.o \ lapacke_dtr_trans.o \ + lapacke_dtz_nancheck.o \ + lapacke_dtz_trans.o \ lapacke_lsame.o \ lapacke_sgb_nancheck.o \ lapacke_sgb_trans.o \ @@ -145,6 +149,8 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_stp_trans.o \ lapacke_str_nancheck.o \ lapacke_str_trans.o \ + lapacke_stz_nancheck.o \ + lapacke_stz_trans.o \ lapacke_xerbla.o \ lapacke_zgb_nancheck.o \ lapacke_zgb_trans.o \ @@ -184,6 +190,8 @@ OBJ = lapacke_cgb_nancheck.o \ lapacke_ztp_trans.o \ lapacke_ztr_nancheck.o \ lapacke_ztr_trans.o \ + lapacke_ztz_nancheck.o \ + lapacke_ztz_trans.o \ lapacke_make_complex_float.o \ lapacke_make_complex_double.o diff --git a/LAPACKE/utils/lapacke_ctz_nancheck.c b/LAPACKE/utils/lapacke_ctz_nancheck.c new file mode 100644 index 0000000000..bea9567811 --- /dev/null +++ b/LAPACKE/utils/lapacke_ctz_nancheck.c @@ -0,0 +1,144 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Check a trapezoidal matrix for NaN entries. The shape of the trapezoidal + matrix is determined by the arguments `direct` and `uplo`. `Direct` chooses + the diagonal which shall be considered and `uplo` tells us whether we use the + upper or lower part of the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float *a, + lapack_int lda ) +{ + lapack_logical colmaj, front, lower, unit; + + if( a == NULL ) return (lapack_logical) 0; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return (lapack_logical) 0; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_offset = tri_n * ( !colmaj ? lda : 1 ); + } else if( !lower && n > m ) { + rect_offset = tri_n * ( colmaj ? lda : 1 ); + } + } else { + if( m > n ) { + tri_offset = rect_m * ( !colmaj ? lda : 1 ); + if( !lower ) { + rect_offset = 0; + } + } else if( n > m ) { + tri_offset = rect_n * ( colmaj ? lda : 1 ); + if( lower ) { + rect_offset = 0; + } + } + } + + /* Check rectangular part */ + if( rect_offset >= 0 ) { + if( LAPACKE_cge_nancheck( matrix_layout, rect_m, rect_n, + &a[rect_offset], lda) ) { + return (lapack_logical) 1; + } + } + + /* Check triangular part */ + return LAPACKE_ctr_nancheck( matrix_layout, uplo, diag, tri_n, + &a[tri_offset], lda ); +} diff --git a/LAPACKE/utils/lapacke_ctz_trans.c b/LAPACKE/utils/lapacke_ctz_trans.c new file mode 100644 index 0000000000..0abe03d283 --- /dev/null +++ b/LAPACKE/utils/lapacke_ctz_trans.c @@ -0,0 +1,153 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Converts input triangular matrix from row-major(C) to column-major(Fortran) + layout or vice versa. The shape of the trapezoidal matrix is determined by + the arguments `direct` and `uplo`. `Direct` chooses the diagonal which shall + be considered and `uplo` tells us whether we use the upper or lower part of + the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +void LAPACKE_ctz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float *in, lapack_int ldin, + lapack_complex_float *out, lapack_int ldout ) +{ + lapack_logical colmaj, front, lower, unit; + + if( in == NULL || out == NULL ) return ; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_in_offset = 0; + lapack_int tri_out_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_in_offset = -1; + lapack_int rect_out_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_in_offset = tri_n * ( !colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( colmaj ? ldout : 1 ); + } else if( !lower && n > m ) { + rect_in_offset = tri_n * ( colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( !colmaj ? ldout : 1 ); + } + } else { + if( m > n ) { + tri_in_offset = rect_m * ( !colmaj ? ldin : 1 ); + tri_out_offset = rect_m * ( colmaj ? ldout : 1 ); + if( !lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } else if( n > m ) { + tri_in_offset = rect_n * ( colmaj ? ldin : 1 ); + tri_out_offset = rect_n * ( !colmaj ? ldout : 1 ); + if( lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } + } + + /* Copy & transpose rectangular part */ + if( rect_in_offset >= 0 && rect_out_offset >= 0 ) { + LAPACKE_cge_trans( matrix_layout, rect_m, rect_n, + &in[rect_in_offset], ldin, + &out[rect_out_offset], ldout ); + } + + /* Copy & transpose triangular part */ + LAPACKE_ctr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); +} diff --git a/LAPACKE/utils/lapacke_dtz_nancheck.c b/LAPACKE/utils/lapacke_dtz_nancheck.c new file mode 100644 index 0000000000..cd2ae6731a --- /dev/null +++ b/LAPACKE/utils/lapacke_dtz_nancheck.c @@ -0,0 +1,143 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Check a trapezoidal matrix for NaN entries. The shape of the trapezoidal + matrix is determined by the arguments `direct` and `uplo`. `Direct` chooses + the diagonal which shall be considered and `uplo` tells us whether we use the + upper or lower part of the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const double *a, lapack_int lda ) +{ + lapack_logical colmaj, front, lower, unit; + + if( a == NULL ) return (lapack_logical) 0; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return (lapack_logical) 0; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_offset = tri_n * ( !colmaj ? lda : 1 ); + } else if( !lower && n > m ) { + rect_offset = tri_n * ( colmaj ? lda : 1 ); + } + } else { + if( m > n ) { + tri_offset = rect_m * ( !colmaj ? lda : 1 ); + if( !lower ) { + rect_offset = 0; + } + } else if( n > m ) { + tri_offset = rect_n * ( colmaj ? lda : 1 ); + if( lower ) { + rect_offset = 0; + } + } + } + + /* Check rectangular part */ + if( rect_offset >= 0 ) { + if( LAPACKE_dge_nancheck( matrix_layout, rect_m, rect_n, + &a[rect_offset], lda ) ) { + return (lapack_logical) 1; + } + } + + /* Check triangular part */ + return LAPACKE_dtr_nancheck( matrix_layout, uplo, diag, tri_n, + &a[tri_offset], lda ); +} diff --git a/LAPACKE/utils/lapacke_dtz_trans.c b/LAPACKE/utils/lapacke_dtz_trans.c new file mode 100644 index 0000000000..f53e03adcf --- /dev/null +++ b/LAPACKE/utils/lapacke_dtz_trans.c @@ -0,0 +1,153 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Converts input triangular matrix from row-major(C) to column-major(Fortran) + layout or vice versa. The shape of the trapezoidal matrix is determined by + the arguments `direct` and `uplo`. `Direct` chooses the diagonal which shall + be considered and `uplo` tells us whether we use the upper or lower part of + the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +void LAPACKE_dtz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const double *in, lapack_int ldin, + double *out, lapack_int ldout ) +{ + lapack_logical colmaj, front, lower, unit; + + if( in == NULL || out == NULL ) return ; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_in_offset = 0; + lapack_int tri_out_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_in_offset = -1; + lapack_int rect_out_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_in_offset = tri_n * ( !colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( colmaj ? ldout : 1 ); + } else if( !lower && n > m ) { + rect_in_offset = tri_n * ( colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( !colmaj ? ldout : 1 ); + } + } else { + if( m > n ) { + tri_in_offset = rect_m * ( !colmaj ? ldin : 1 ); + tri_out_offset = rect_m * ( colmaj ? ldout : 1 ); + if( !lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } else if( n > m ) { + tri_in_offset = rect_n * ( colmaj ? ldin : 1 ); + tri_out_offset = rect_n * ( !colmaj ? ldout : 1 ); + if( lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } + } + + /* Copy & transpose rectangular part */ + if( rect_in_offset >= 0 && rect_out_offset >= 0 ) { + LAPACKE_dge_trans( matrix_layout, rect_m, rect_n, + &in[rect_in_offset], ldin, + &out[rect_out_offset], ldout ); + } + + /* Copy & transpose triangular part */ + LAPACKE_dtr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); +} diff --git a/LAPACKE/utils/lapacke_stz_nancheck.c b/LAPACKE/utils/lapacke_stz_nancheck.c new file mode 100644 index 0000000000..7d7c30f96c --- /dev/null +++ b/LAPACKE/utils/lapacke_stz_nancheck.c @@ -0,0 +1,143 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Check a trapezoidal matrix for NaN entries. The shape of the trapezoidal + matrix is determined by the arguments `direct` and `uplo`. `Direct` chooses + the diagonal which shall be considered and `uplo` tells us whether we use the + upper or lower part of the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const float *a, lapack_int lda ) +{ + lapack_logical colmaj, front, lower, unit; + + if( a == NULL ) return (lapack_logical) 0; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return (lapack_logical) 0; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_offset = tri_n * ( !colmaj ? lda : 1 ); + } else if( !lower && n > m ) { + rect_offset = tri_n * ( colmaj ? lda : 1 ); + } + } else { + if( m > n ) { + tri_offset = rect_m * ( !colmaj ? lda : 1 ); + if( !lower ) { + rect_offset = 0; + } + } else if( n > m ) { + tri_offset = rect_n * ( colmaj ? lda : 1 ); + if( lower ) { + rect_offset = 0; + } + } + } + + /* Check rectangular part */ + if( rect_offset >= 0 ) { + if( LAPACKE_sge_nancheck( matrix_layout, rect_m, rect_n, + &a[rect_offset], lda) ) { + return (lapack_logical) 1; + } + } + + /* Check triangular part */ + return LAPACKE_str_nancheck( matrix_layout, uplo, diag, tri_n, + &a[tri_offset], lda ); +} diff --git a/LAPACKE/utils/lapacke_stz_trans.c b/LAPACKE/utils/lapacke_stz_trans.c new file mode 100644 index 0000000000..bdb4279572 --- /dev/null +++ b/LAPACKE/utils/lapacke_stz_trans.c @@ -0,0 +1,153 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Converts input triangular matrix from row-major(C) to column-major(Fortran) + layout or vice versa. The shape of the trapezoidal matrix is determined by + the arguments `direct` and `uplo`. `Direct` chooses the diagonal which shall + be considered and `uplo` tells us whether we use the upper or lower part of + the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +void LAPACKE_stz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const float *in, lapack_int ldin, + float *out, lapack_int ldout ) +{ + lapack_logical colmaj, front, lower, unit; + + if( in == NULL || out == NULL ) return ; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_in_offset = 0; + lapack_int tri_out_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_in_offset = -1; + lapack_int rect_out_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_in_offset = tri_n * ( !colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( colmaj ? ldout : 1 ); + } else if( !lower && n > m ) { + rect_in_offset = tri_n * ( colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( !colmaj ? ldout : 1 ); + } + } else { + if( m > n ) { + tri_in_offset = rect_m * ( !colmaj ? ldin : 1 ); + tri_out_offset = rect_m * ( colmaj ? ldout : 1 ); + if( !lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } else if( n > m ) { + tri_in_offset = rect_n * ( colmaj ? ldin : 1 ); + tri_out_offset = rect_n * ( !colmaj ? ldout : 1 ); + if( lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } + } + + /* Copy & transpose rectangular part */ + if( rect_in_offset >= 0 && rect_out_offset >= 0 ) { + LAPACKE_sge_trans( matrix_layout, rect_m, rect_n, + &in[rect_in_offset], ldin, + &out[rect_out_offset], ldout ); + } + + /* Copy & transpose triangular part */ + LAPACKE_str_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); +} diff --git a/LAPACKE/utils/lapacke_ztz_nancheck.c b/LAPACKE/utils/lapacke_ztz_nancheck.c new file mode 100644 index 0000000000..481fa4c033 --- /dev/null +++ b/LAPACKE/utils/lapacke_ztz_nancheck.c @@ -0,0 +1,144 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Check a trapezoidal matrix for NaN entries. The shape of the trapezoidal + matrix is determined by the arguments `direct` and `uplo`. `Direct` chooses + the diagonal which shall be considered and `uplo` tells us whether we use the + upper or lower part of the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double *a, + lapack_int lda ) +{ + lapack_logical colmaj, front, lower, unit; + + if( a == NULL ) return (lapack_logical) 0; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return (lapack_logical) 0; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_offset = tri_n * ( !colmaj ? lda : 1 ); + } else if( !lower && n > m ) { + rect_offset = tri_n * ( colmaj ? lda : 1 ); + } + } else { + if( m > n ) { + tri_offset = rect_m * ( !colmaj ? lda : 1 ); + if( !lower ) { + rect_offset = 0; + } + } else if( n > m ) { + tri_offset = rect_n * ( colmaj ? lda : 1 ); + if( lower ) { + rect_offset = 0; + } + } + } + + /* Check rectangular part */ + if( rect_offset >= 0 ) { + if( LAPACKE_zge_nancheck( matrix_layout, rect_m, rect_n, + &a[rect_offset], lda) ) { + return (lapack_logical) 1; + } + } + + /* Check triangular part */ + return LAPACKE_ztr_nancheck( matrix_layout, uplo, diag, tri_n, + &a[tri_offset], lda ); +} diff --git a/LAPACKE/utils/lapacke_ztz_trans.c b/LAPACKE/utils/lapacke_ztz_trans.c new file mode 100644 index 0000000000..fa4bb94c5f --- /dev/null +++ b/LAPACKE/utils/lapacke_ztz_trans.c @@ -0,0 +1,153 @@ +/***************************************************************************** + Copyright (c) 2022, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK utility function +* Author: Simon Märtens +*****************************************************************************/ + +#include "lapacke_utils.h" + +/***************************************************************************** + Converts input triangular matrix from row-major(C) to column-major(Fortran) + layout or vice versa. The shape of the trapezoidal matrix is determined by + the arguments `direct` and `uplo`. `Direct` chooses the diagonal which shall + be considered and `uplo` tells us whether we use the upper or lower part of + the matrix with respect to the chosen diagonal. + + Diagonals 'F' (front / forward) and 'B' (back / backward): + + A = ( F ) A = ( F B ) + ( F ) ( F B ) + ( B F ) ( F B ) + ( B ) + ( B ) + + direct = 'F', uplo = 'L': + + A = ( * ) A = ( * ) + ( * * ) ( * * ) + ( * * * ) ( * * * ) + ( * * * ) + ( * * * ) + + direct = 'F', uplo = 'U': + + A = ( * * * ) A = ( * * * * * ) + ( * * ) ( * * * * ) + ( * ) ( * * * ) + ( ) + ( ) + + direct = 'B', uplo = 'L': + + A = ( ) A = ( * * * ) + ( ) ( * * * * ) + ( * ) ( * * * * * ) + ( * * ) + ( * * * ) + + direct = 'B', uplo = 'U': + + A = ( * * * ) A = ( * * * ) + ( * * * ) ( * * ) + ( * * * ) ( * ) + ( * * ) + ( * ) + +*****************************************************************************/ + +void LAPACKE_ztz_trans( int matrix_layout, char direct, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double *in, lapack_int ldin, + lapack_complex_double *out, lapack_int ldout ) +{ + lapack_logical colmaj, front, lower, unit; + + if( in == NULL || out == NULL ) return ; + + colmaj = ( matrix_layout == LAPACK_COL_MAJOR ); + front = LAPACKE_lsame( direct, 'f' ); + lower = LAPACKE_lsame( uplo, 'l' ); + unit = LAPACKE_lsame( diag, 'u' ); + + if( ( !colmaj && ( matrix_layout != LAPACK_ROW_MAJOR ) ) || + ( !front && !LAPACKE_lsame( direct, 'b' ) ) || + ( !lower && !LAPACKE_lsame( uplo, 'u' ) ) || + ( !unit && !LAPACKE_lsame( diag, 'n' ) ) ) { + /* Just exit if any of input parameters are wrong */ + return; + } + + /* Initial offsets and sizes of triangular and rectangular parts */ + lapack_int tri_in_offset = 0; + lapack_int tri_out_offset = 0; + lapack_int tri_n = MIN(m,n); + lapack_int rect_in_offset = -1; + lapack_int rect_out_offset = -1; + lapack_int rect_m = ( m > n ) ? m - n : m; + lapack_int rect_n = ( n > m ) ? n - m : n; + + /* Fix offsets depending on the shape of the matrix */ + if( front ) { + if( lower && m > n ) { + rect_in_offset = tri_n * ( !colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( colmaj ? ldout : 1 ); + } else if( !lower && n > m ) { + rect_in_offset = tri_n * ( colmaj ? ldin : 1 ); + rect_out_offset = tri_n * ( !colmaj ? ldout : 1 ); + } + } else { + if( m > n ) { + tri_in_offset = rect_m * ( !colmaj ? ldin : 1 ); + tri_out_offset = rect_m * ( colmaj ? ldout : 1 ); + if( !lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } else if( n > m ) { + tri_in_offset = rect_n * ( colmaj ? ldin : 1 ); + tri_out_offset = rect_n * ( !colmaj ? ldout : 1 ); + if( lower ) { + rect_in_offset = 0; + rect_out_offset = 0; + } + } + } + + /* Copy & transpose rectangular part */ + if( rect_in_offset >= 0 && rect_out_offset >= 0 ) { + LAPACKE_zge_trans( matrix_layout, rect_m, rect_n, + &in[rect_in_offset], ldin, + &out[rect_out_offset], ldout ); + } + + /* Copy & transpose triangular part */ + LAPACKE_ztr_trans( matrix_layout, uplo, diag, tri_n, + &in[tri_in_offset], ldin, + &out[tri_out_offset], ldout ); +} diff --git a/README.md b/README.md index c255cc2de7..142aa7b720 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ * VERSION 3.9.1 : April 2021 * VERSION 3.10.0 : June 2021 * VERSION 3.10.1 : April 2022 +* VERSION 3.11.0 : November 2022 LAPACK is a library of Fortran subroutines for solving the most commonly occurring problems in numerical linear algebra. diff --git a/SRC/CMakeLists.txt b/SRC/CMakeLists.txt index d5cd5740cb..8d3d2bcb8e 100644 --- a/SRC/CMakeLists.txt +++ b/SRC/CMakeLists.txt @@ -86,7 +86,7 @@ set(SLASRC sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f - sgels.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f + sgels.f sgelst.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f sgeqp3.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesv.f sgesvd.f sgesvdx.f sgesvx.f sgetc2.f sgetf2.f sgetri.f @@ -177,7 +177,7 @@ set(CLASRC cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f - cgels.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f + cgels.f cgelst.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesv.f cgesvd.f cgesvdx.f cgesvj.f cgejsv.f cgsvj0.f cgsvj1.f @@ -286,7 +286,7 @@ set(DLASRC dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f - dgels.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f + dgels.f dgelst.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f dgeqp3.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesv.f dgesvd.f dgesvdx.f dgesvx.f dgetc2.f dgetf2.f dgetrf.f dgetrf2.f dgetri.f @@ -375,7 +375,7 @@ set(ZLASRC zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f - zgels.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f + zgels.f zgelst.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f zgesc2.f zgesdd.f zgesv.f zgesvd.f zgesvdx.f zgesvx.f zgesvj.f zgejsv.f zgsvj0.f zgsvj1.f diff --git a/SRC/Makefile b/SRC/Makefile index 04f68dcfdf..e89ffeeab1 100644 --- a/SRC/Makefile +++ b/SRC/Makefile @@ -118,7 +118,7 @@ SLASRC = \ sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \ sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \ sgehd2.o sgehrd.o sgelq2.o sgelqf.o \ - sgels.o sgelsd.o sgelss.o sgelsy.o sgeql2.o sgeqlf.o \ + sgels.o sgelst.o sgelsd.o sgelss.o sgelsy.o sgeql2.o sgeqlf.o \ sgeqp3.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \ sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvdx.o sgesvx.o \ sgetc2.o sgetf2.o sgetri.o \ @@ -211,7 +211,7 @@ CLASRC = \ cgbtf2.o cgbtrf.o cgbtrs.o cgebak.o cgebal.o cgebd2.o cgebrd.o \ cgecon.o cgeequ.o cgees.o cgeesx.o cgeev.o cgeevx.o \ cgehd2.o cgehrd.o cgelq2.o cgelqf.o \ - cgels.o cgelsd.o cgelss.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \ + cgels.o cgelst.o cgelsd.o cgelss.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \ cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \ cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o cgesvdx.o \ cgesvj.o cgejsv.o cgsvj0.o cgsvj1.o \ @@ -320,7 +320,7 @@ DLASRC = \ dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ dgehd2.o dgehrd.o dgelq2.o dgelqf.o \ - dgels.o dgelsd.o dgelss.o dgelsy.o dgeql2.o dgeqlf.o \ + dgels.o dgelst.o dgelsd.o dgelss.o dgelsy.o dgeql2.o dgeqlf.o \ dgeqp3.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \ dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvdx.o dgesvx.o \ dgetc2.o dgetf2.o dgetrf.o dgetri.o \ @@ -412,7 +412,7 @@ ZLASRC = \ zgbtf2.o zgbtrf.o zgbtrs.o zgebak.o zgebal.o zgebd2.o zgebrd.o \ zgecon.o zgeequ.o zgees.o zgeesx.o zgeev.o zgeevx.o \ zgehd2.o zgehrd.o zgelq2.o zgelqf.o \ - zgels.o zgelsd.o zgelss.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \ + zgels.o zgelst.o zgelsd.o zgelss.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \ zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \ zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvdx.o \ zgesvj.o zgejsv.o zgsvj0.o zgsvj1.o \ diff --git a/SRC/cgecon.f b/SRC/cgecon.f index 48f409b680..6f426c2ab6 100644 --- a/SRC/cgecon.f +++ b/SRC/cgecon.f @@ -106,6 +106,7 @@ *> INFO is INTEGER *> = 0: successful exit *> < 0: if INFO = -i, the i-th argument had an illegal value +*> =-5: if ANORM is NAN or negative. *> \endverbatim * * Authors: @@ -153,10 +154,10 @@ SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, INTEGER ISAVE( 3 ) * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, SISNAN INTEGER ICAMAX REAL SLAMCH - EXTERNAL LSAME, ICAMAX, SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH, SISNAN * .. * .. External Subroutines .. EXTERNAL CLACN2, CLATRS, CSRSCL, XERBLA @@ -182,7 +183,7 @@ SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO ) THEN + ELSE IF( ANORM.LT.ZERO .OR. SISNAN( ANORM ) ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN diff --git a/SRC/cgejsv.f b/SRC/cgejsv.f index e37b25b6b2..062ac182b1 100644 --- a/SRC/cgejsv.f +++ b/SRC/cgejsv.f @@ -304,7 +304,7 @@ *> -> the minimal requirement is LWORK >= 3*N. *> -> For optimal performance, *> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQF, *> CUNMLQ. In general, the optimal length LWORK is computed as *> LWORK >= max(N+LWORK(CGEQP3), N+LWORK(CGESVJ), *> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). @@ -313,7 +313,7 @@ *> -> the minimal requirement is LWORK >= 3*N. *> -> For optimal performance, *> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQF, *> CUNMLQ. In general, the optimal length LWORK is computed as *> LWORK >= max(N+LWORK(CGEQP3), LWORK(CPOCON), N+LWORK(CGESVJ), *> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). @@ -350,7 +350,7 @@ *> *> \param[out] RWORK *> \verbatim -*> RWORK is REAL array, dimension (MAX(7,LWORK)) +*> RWORK is REAL array, dimension (MAX(7,LRWORK)) *> On exit, *> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) *> such that SCALE*SVA(1:N) are the computed singular values diff --git a/SRC/cgelst.f b/SRC/cgelst.f new file mode 100644 index 0000000000..7d8e44ddf2 --- /dev/null +++ b/SRC/cgelst.f @@ -0,0 +1,533 @@ +*> \brief CGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its conjugate-transpose, using a QR +*> or LQ factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by CGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by CGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> modulus of elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of the modulus of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complexGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, CLANGE +* .. +* .. External Subroutines .. + EXTERNAL CGELQT, CGEQRT, CGEMLQT, CGEMQRT, SLABAD, + $ CLASCL, CLASET, CTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'CGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = REAL( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'CGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL CGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMQRT( 'Left', 'Conjugate transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL CGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMLQT( 'Left', 'Conjugate transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ M, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = REAL( LWOPT ) +* + RETURN +* +* End of CGELST +* + END diff --git a/SRC/chbgv.f b/SRC/chbgv.f index 130594a0f2..3f84ff5d96 100644 --- a/SRC/chbgv.f +++ b/SRC/chbgv.f @@ -235,7 +235,7 @@ SUBROUTINE CHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, INFO = -12 END IF IF( INFO.NE.0 ) THEN - CALL XERBLA( 'CHBGV ', -INFO ) + CALL XERBLA( 'CHBGV', -INFO ) RETURN END IF * diff --git a/SRC/claqr5.f b/SRC/claqr5.f index 0a01cc2265..4e6f43a73d 100644 --- a/SRC/claqr5.f +++ b/SRC/claqr5.f @@ -533,11 +533,13 @@ SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, * . Mth bulge. Exploit fact that first two elements * . of row are actually zero. ==== * - REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) - H( K+3, K ) = -REFSUM - H( K+3, K+1 ) = -REFSUM*CONJG( V( 2, M ) ) - H( K+3, K+2 ) = H( K+3, K+2 ) - - $ REFSUM*CONJG( V( 3, M ) ) + T1 = V( 1, M ) + T2 = T1*CONJG( V( 2, M ) ) + T3 = T1*CONJG( V( 3, M ) ) + REFSUM = V( 3, M )*H( K+3, K+2 ) + H( K+3, K ) = -REFSUM*T1 + H( K+3, K+1 ) = -REFSUM*T2 + H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*T3 * * ==== Calculate reflection to move * . Mth bulge one step. ==== @@ -572,12 +574,13 @@ SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, $ S( 2*M ), VT ) ALPHA = VT( 1 ) CALL CLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) - REFSUM = CONJG( VT( 1 ) )* - $ ( H( K+1, K )+CONJG( VT( 2 ) )* - $ H( K+2, K ) ) + T1 = CONJG( VT( 1 ) ) + T2 = T1*VT( 2 ) + T3 = T1*VT( 3 ) + REFSUM = H( K+1, K )+CONJG( VT( 2 ) )*H( K+2, K ) * - IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+ - $ CABS1( REFSUM*VT( 3 ) ).GT.ULP* + IF( CABS1( H( K+2, K )-REFSUM*T2 )+ + $ CABS1( REFSUM*T3 ).GT.ULP* $ ( CABS1( H( K, K ) )+CABS1( H( K+1, $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN * @@ -595,7 +598,7 @@ SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, * . Replace the old reflector with * . the new one. ==== * - H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+1, K ) = H( K+1, K ) - REFSUM*T1 H( K+2, K ) = ZERO H( K+3, K ) = ZERO V( 1, M ) = VT( 1 ) diff --git a/SRC/claqz0.f b/SRC/claqz0.f index 9cc25c6dcd..6de40e06ca 100644 --- a/SRC/claqz0.f +++ b/SRC/claqz0.f @@ -644,7 +644,7 @@ RECURSIVE SUBROUTINE CLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, NS = MIN( NSHIFTS, ISTOP-ISTART2 ) NS = MIN( NS, N_UNDEFLATED ) - SHIFTPOS = ISTOP-N_DEFLATED-N_UNDEFLATED+1 + SHIFTPOS = ISTOP-N_UNDEFLATED+1 IF ( MOD( LD, 6 ) .EQ. 0 ) THEN * diff --git a/SRC/clarscl2.f b/SRC/clarscl2.f index 26b028dbba..f4e68523b2 100644 --- a/SRC/clarscl2.f +++ b/SRC/clarscl2.f @@ -1,4 +1,4 @@ -*> \brief \b CLARSCL2 performs reciprocal diagonal scaling on a vector. +*> \brief \b CLARSCL2 performs reciprocal diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -34,7 +34,7 @@ *> *> \verbatim *> -*> CLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> CLARSCL2 performs a reciprocal diagonal scaling on a matrix: *> x <-- inv(D) * x *> where the REAL diagonal matrix D is stored as a vector. *> @@ -66,14 +66,14 @@ *> \param[in,out] X *> \verbatim *> X is COMPLEX array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/clartg.f90 b/SRC/clartg.f90 index 13a629a34e..6231f85203 100644 --- a/SRC/clartg.f90 +++ b/SRC/clartg.f90 @@ -30,7 +30,7 @@ !> The mathematical formulas used for C and S are !> !> sgn(x) = { x / |x|, x != 0 -!> { 1, x = 0 +!> { 1, x = 0 !> !> R = sgn(F) * sqrt(|F|**2 + |G|**2) !> @@ -38,19 +38,20 @@ !> !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) !> +!> Special conditions: +!> If G=0, then C=1 and S=0. +!> If F=0, then C=0 and S is chosen so that R is real. +!> !> When F and G are real, the formulas simplify to C = F/R and !> S = G/R, and the returned values of C, S, and R should be -!> identical to those returned by CLARTG. +!> identical to those returned by SLARTG. !> !> The algorithm used to compute these quantities incorporates scaling !> to avoid overflow or underflow in computing the square root of the !> sum of squares. !> -!> This is a faster version of the BLAS1 routine CROTG, except for -!> the following differences: -!> F and G are unchanged on return. -!> If G=0, then C=1 and S=0. -!> If F=0, then C=0 and S is chosen so that R is real. +!> This is the same routine CROTG fom BLAS1, except that +!> F and G are unchanged on return. !> !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. !> \endverbatim @@ -91,22 +92,19 @@ ! Authors: ! ======== ! -!> \author Edward Anderson, Lockheed Martin +!> \author Weslley Pereira, University of Colorado Denver, USA ! -!> \date August 2016 +!> \date December 2021 ! !> \ingroup OTHERauxiliary ! -!> \par Contributors: -! ================== -!> -!> Weslley Pereira, University of Colorado Denver, USA -! !> \par Further Details: ! ===================== !> !> \verbatim !> +!> Based on the algorithm from +!> !> Anderson E. (2017) !> Algorithm 978: Safe Scaling in the Level 1 BLAS !> ACM Trans Math Softw 44:1--28 @@ -117,7 +115,7 @@ subroutine CLARTG( f, g, c, s, r ) use LA_CONSTANTS, & only: wp=>sp, zero=>szero, one=>sone, two=>stwo, czero, & - rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax + safmin=>ssafmin, safmax=>ssafmax ! ! -- LAPACK auxiliary routine -- ! -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -129,7 +127,7 @@ subroutine CLARTG( f, g, c, s, r ) complex(wp) f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax complex(wp) :: fs, gs, t ! .. ! .. Intrinsic Functions .. @@ -141,6 +139,9 @@ subroutine CLARTG( f, g, c, s, r ) ! .. Statement Function definitions .. ABSSQ( t ) = real( t )**2 + aimag( t )**2 ! .. +! .. Constants .. + rtmin = sqrt( safmin ) +! .. ! .. Executable Statements .. ! if( g == czero ) then @@ -149,30 +150,43 @@ subroutine CLARTG( f, g, c, s, r ) r = f else if( f == czero ) then c = zero - g1 = max( abs(real(g)), abs(aimag(g)) ) - if( g1 > rtmin .and. g1 < rtmax ) then + if( real(g) == zero ) then + r = abs(aimag(g)) + s = conjg( g ) / r + elseif( aimag(g) == zero ) then + r = abs(real(g)) + s = conjg( g ) / r + else + g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) + if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) - s = conjg( g ) / d - r = d - else +! The following two lines can be replaced by `d = abs( g )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else ! ! Use scaled algorithm ! - u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) - s = conjg( gs ) / d - r = d*u + u = min( safmax, max( safmin, g1 ) ) + gs = g / u +! The following two lines can be replaced by `d = abs( gs )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if end if else f1 = max( abs(real(f)), abs(aimag(f)) ) g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/4 ) if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then ! @@ -181,32 +195,51 @@ subroutine CLARTG( f, g, c, s, r ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = f / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( g ) * ( f / sqrt( f2*h2 ) ) + else + s = conjg( g ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = f / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = f * ( h2 / d ) + end if + s = conjg( g ) * ( f / d ) end if - p = 1 / d - c = f2*p - s = conjg( g )*( f*p ) - r = f*( h2*p ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 / u < rtmin ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -214,19 +247,43 @@ subroutine CLARTG( f, g, c, s, r ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = fs / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = fs / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = fs * ( h2 / d ) + end if + s = conjg( gs ) * ( fs / d ) end if - p = 1 / d - c = ( f2*p )*w - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u + ! Rescale c and r + c = c * w + r = r * u end if end if return diff --git a/SRC/clascl2.f b/SRC/clascl2.f index 2ae27975c5..882273b5e2 100644 --- a/SRC/clascl2.f +++ b/SRC/clascl2.f @@ -1,4 +1,4 @@ -*> \brief \b CLASCL2 performs diagonal scaling on a vector. +*> \brief \b CLASCL2 performs diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -34,9 +34,9 @@ *> *> \verbatim *> -*> CLASCL2 performs a diagonal scaling on a vector: +*> CLASCL2 performs a diagonal scaling on a matrix: *> x <-- D * x -*> where the diagonal REAL matrix D is stored as a vector. +*> where the diagonal REAL matrix D is stored as a matrix. *> *> Eventually to be replaced by BLAS_cge_diag_scale in the new BLAS *> standard. @@ -66,14 +66,14 @@ *> \param[in,out] X *> \verbatim *> X is COMPLEX array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/dgecon.f b/SRC/dgecon.f index aa10dee9a2..1ad302ae3f 100644 --- a/SRC/dgecon.f +++ b/SRC/dgecon.f @@ -106,6 +106,7 @@ *> INFO is INTEGER *> = 0: successful exit *> < 0: if INFO = -i, the i-th argument had an illegal value +*> =-5: if ANORM is NAN or negative. *> \endverbatim * * Authors: @@ -152,10 +153,10 @@ SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, INTEGER ISAVE( 3 ) * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, DISNAN INTEGER IDAMAX DOUBLE PRECISION DLAMCH - EXTERNAL LSAME, IDAMAX, DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH, DISNAN * .. * .. External Subroutines .. EXTERNAL DLACN2, DLATRS, DRSCL, XERBLA @@ -175,7 +176,7 @@ SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO ) THEN + ELSE IF( ANORM.LT.ZERO .OR. DISNAN( ANORM ) ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN diff --git a/SRC/dgejsv.f b/SRC/dgejsv.f index fc13f4a5fb..83d16c30e1 100644 --- a/SRC/dgejsv.f +++ b/SRC/dgejsv.f @@ -224,7 +224,7 @@ *> *> \param[out] U *> \verbatim -*> U is DOUBLE PRECISION array, dimension ( LDU, N ) +*> U is DOUBLE PRECISION array, dimension ( LDU, N ) or ( LDU, M ) *> If JOBU = 'U', then U contains on exit the M-by-N matrix of *> the left singular vectors. *> If JOBU = 'F', then U contains on exit the M-by-M matrix of @@ -271,7 +271,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> WORK is DOUBLE PRECISION array, dimension (MAX(7,LWORK)) *> On exit, if N > 0 .AND. M > 0 (else not referenced), *> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such *> that SCALE*SVA(1:N) are the computed singular values diff --git a/SRC/dgelst.f b/SRC/dgelst.f new file mode 100644 index 0000000000..ca0e04a9b8 --- /dev/null +++ b/SRC/dgelst.f @@ -0,0 +1,531 @@ +*> \brief DGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its transpose, using a QR or LQ +*> factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by DGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by DGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DGELQT, DGEQRT, DGEMLQT, DGEMQRT, DLABAD, + $ DLASCL, DLASET, DTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'DGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = DBLE( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'DGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = DLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL DGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMQRT( 'Left', 'Transpose', M, NRHS, N, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, WORK( MN*NB+1 ), + $ INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL DGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMLQT( 'Left', 'Transpose', N, NRHS, M, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = DBLE( LWOPT ) +* + RETURN +* +* End of DGELST +* + END diff --git a/SRC/dhgeqz.f b/SRC/dhgeqz.f index b5a2917e32..d6233596c8 100644 --- a/SRC/dhgeqz.f +++ b/SRC/dhgeqz.f @@ -337,9 +337,9 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, - $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, - $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, - $ WR2 + $ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, + $ U12, U12L, U2, ULP, VS, W11, W12, W21, W22, + $ WABS, WI, WR, WR2 * .. * .. Local Arrays .. DOUBLE PRECISION V( 3 ) @@ -1127,25 +1127,27 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, H( J+2, J-1 ) = ZERO END IF * + T2 = TAU*V( 2 ) + T3 = TAU*V( 3 ) DO 230 JC = J, ILASTM - TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* - $ H( J+2, JC ) ) - H( J, JC ) = H( J, JC ) - TEMP - H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) - H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) - TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* - $ T( J+2, JC ) ) - T( J, JC ) = T( J, JC ) - TEMP2 - T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) - T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) + TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* + $ H( J+2, JC ) + H( J, JC ) = H( J, JC ) - TEMP*TAU + H( J+1, JC ) = H( J+1, JC ) - TEMP*T2 + H( J+2, JC ) = H( J+2, JC ) - TEMP*T3 + TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* + $ T( J+2, JC ) + T( J, JC ) = T( J, JC ) - TEMP2*TAU + T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2 + T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3 230 CONTINUE IF( ILQ ) THEN DO 240 JR = 1, N - TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* - $ Q( JR, J+2 ) ) - Q( JR, J ) = Q( JR, J ) - TEMP - Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) - Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) + Q( JR, J ) = Q( JR, J ) - TEMP*TAU + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2 + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3 240 CONTINUE END IF * @@ -1233,27 +1235,29 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, * * Apply transformations from the right. * + T2 = TAU*V(2) + T3 = TAU*V(3) DO 260 JR = IFRSTM, MIN( J+3, ILAST ) - TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* - $ H( JR, J+2 ) ) - H( JR, J ) = H( JR, J ) - TEMP - H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) - H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) + TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* + $ H( JR, J+2 ) + H( JR, J ) = H( JR, J ) - TEMP*TAU + H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2 + H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3 260 CONTINUE DO 270 JR = IFRSTM, J + 2 - TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* - $ T( JR, J+2 ) ) - T( JR, J ) = T( JR, J ) - TEMP - T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) - T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) + TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* + $ T( JR, J+2 ) + T( JR, J ) = T( JR, J ) - TEMP*TAU + T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2 + T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3 270 CONTINUE IF( ILZ ) THEN DO 280 JR = 1, N - TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* - $ Z( JR, J+2 ) ) - Z( JR, J ) = Z( JR, J ) - TEMP - Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) - Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) + Z( JR, J ) = Z( JR, J ) - TEMP*TAU + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2 + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3 280 CONTINUE END IF T( J+1, J ) = ZERO diff --git a/SRC/dlaqr5.f b/SRC/dlaqr5.f index 43b4ac72a3..cc94b12223 100644 --- a/SRC/dlaqr5.f +++ b/SRC/dlaqr5.f @@ -558,10 +558,13 @@ SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * . Mth bulge. Exploit fact that first two elements * . of row are actually zero. ==== * - REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) - H( K+3, K ) = -REFSUM - H( K+3, K+1 ) = -REFSUM*V( 2, M ) - H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*V( 3, M ) + T1 = V( 1, M ) + T2 = T1*V( 2, M ) + T3 = T1*V( 3, M ) + REFSUM = V( 3, M )*H( K+3, K+2 ) + H( K+3, K ) = -REFSUM*T1 + H( K+3, K+1 ) = -REFSUM*T2 + H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*T3 * * ==== Calculate reflection to move * . Mth bulge one step. ==== @@ -597,11 +600,13 @@ SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, $ VT ) ALPHA = VT( 1 ) CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) - REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* - $ H( K+2, K ) ) + T1 = VT( 1 ) + T2 = T1*VT( 2 ) + T3 = T1*VT( 3 ) + REFSUM = H( K+1, K ) + VT( 2 )*H( K+2, K ) * - IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ - $ ABS( REFSUM*VT( 3 ) ).GT.ULP* + IF( ABS( H( K+2, K )-REFSUM*T2 )+ + $ ABS( REFSUM*T3 ).GT.ULP* $ ( ABS( H( K, K ) )+ABS( H( K+1, $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN * @@ -619,7 +624,7 @@ SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * . Replace the old reflector with * . the new one. ==== * - H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+1, K ) = H( K+1, K ) - REFSUM*T1 H( K+2, K ) = ZERO H( K+3, K ) = ZERO V( 1, M ) = VT( 1 ) diff --git a/SRC/dlaqz0.f b/SRC/dlaqz0.f index 5b09654068..c4cb95fd32 100644 --- a/SRC/dlaqz0.f +++ b/SRC/dlaqz0.f @@ -678,7 +678,7 @@ RECURSIVE SUBROUTINE DLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, NS = MIN( NSHIFTS, ISTOP-ISTART2 ) NS = MIN( NS, N_UNDEFLATED ) - SHIFTPOS = ISTOP-N_DEFLATED-N_UNDEFLATED+1 + SHIFTPOS = ISTOP-N_UNDEFLATED+1 * * Shuffle shifts to put double shifts in front * This ensures that we don't split up a double shift diff --git a/SRC/dlarra.f b/SRC/dlarra.f index 2fb30cd762..a62a079da0 100644 --- a/SRC/dlarra.f +++ b/SRC/dlarra.f @@ -164,6 +164,7 @@ SUBROUTINE DLARRA( N, D, E, E2, SPLTOL, TNRM, * .. Executable Statements .. * INFO = 0 + NSPLIT = 1 * * Quick return if possible * @@ -172,7 +173,6 @@ SUBROUTINE DLARRA( N, D, E, E2, SPLTOL, TNRM, END IF * * Compute splitting points - NSPLIT = 1 IF(SPLTOL.LT.ZERO) THEN * Criterion based on absolute off-diagonal value TMP1 = ABS(SPLTOL)* TNRM diff --git a/SRC/dlarrc.f b/SRC/dlarrc.f index 55a17626ac..d3fea59666 100644 --- a/SRC/dlarrc.f +++ b/SRC/dlarrc.f @@ -167,6 +167,9 @@ SUBROUTINE DLARRC( JOBT, N, VL, VU, D, E, PIVMIN, * .. Executable Statements .. * INFO = 0 + LCNT = 0 + RCNT = 0 + EIGCNT = 0 * * Quick return if possible * @@ -174,9 +177,6 @@ SUBROUTINE DLARRC( JOBT, N, VL, VU, D, E, PIVMIN, RETURN END IF * - LCNT = 0 - RCNT = 0 - EIGCNT = 0 MATT = LSAME( JOBT, 'T' ) diff --git a/SRC/dlarrd.f b/SRC/dlarrd.f index 08dfd02c30..ea1896adf2 100644 --- a/SRC/dlarrd.f +++ b/SRC/dlarrd.f @@ -381,6 +381,7 @@ SUBROUTINE DLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, * .. Executable Statements .. * INFO = 0 + M = 0 * * Quick return if possible * @@ -424,14 +425,9 @@ SUBROUTINE DLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, END IF * Initialize error flags - INFO = 0 NCNVRG = .FALSE. TOOFEW = .FALSE. -* Quick return if possible - M = 0 - IF( N.EQ.0 ) RETURN - * Simplification: IF( IRANGE.EQ.INDRNG .AND. IL.EQ.1 .AND. IU.EQ.N ) IRANGE = 1 diff --git a/SRC/dlarre.f b/SRC/dlarre.f index afbfe6379c..70f59b8295 100644 --- a/SRC/dlarre.f +++ b/SRC/dlarre.f @@ -367,6 +367,8 @@ SUBROUTINE DLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, * INFO = 0 + NSPLIT = 0 + M = 0 * * Quick return if possible * @@ -384,8 +386,6 @@ SUBROUTINE DLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, IRANGE = INDRNG END IF - M = 0 - * Get machine constants SAFMIN = DLAMCH( 'S' ) EPS = DLAMCH( 'P' ) diff --git a/SRC/dlarscl2.f b/SRC/dlarscl2.f index 2468e2702d..cc4b9aa3c1 100644 --- a/SRC/dlarscl2.f +++ b/SRC/dlarscl2.f @@ -1,4 +1,4 @@ -*> \brief \b DLARSCL2 performs reciprocal diagonal scaling on a vector. +*> \brief \b DLARSCL2 performs reciprocal diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -33,7 +33,7 @@ *> *> \verbatim *> -*> DLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> DLARSCL2 performs a reciprocal diagonal scaling on a matrix: *> x <-- inv(D) * x *> where the diagonal matrix D is stored as a vector. *> @@ -65,14 +65,14 @@ *> \param[in,out] X *> \verbatim *> X is DOUBLE PRECISION array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/dlartg.f90 b/SRC/dlartg.f90 index ef8c6e3865..b7049c32f1 100644 --- a/SRC/dlartg.f90 +++ b/SRC/dlartg.f90 @@ -11,7 +11,7 @@ ! SUBROUTINE DLARTG( F, G, C, S, R ) ! ! .. Scalar Arguments .. -! REAL(wp) C, F, G, R, S +! REAL(wp) C, F, G, R, S ! .. ! !> \par Purpose: @@ -45,8 +45,6 @@ !> floating point operations (saves work in DBDSQR when !> there are zeros on the diagonal). !> -!> If F exceeds G in magnitude, C will be positive. -!> !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. !> \endverbatim ! @@ -112,7 +110,7 @@ subroutine DLARTG( f, g, c, s, r ) use LA_CONSTANTS, & only: wp=>dp, zero=>dzero, half=>dhalf, one=>done, & - rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax + safmin=>dsafmin, safmax=>dsafmax ! ! -- LAPACK auxiliary routine -- ! -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -123,11 +121,15 @@ subroutine DLARTG( f, g, c, s, r ) real(wp) :: c, f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, fs, g1, gs, p, u, uu + real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax ! .. ! .. Intrinsic Functions .. intrinsic :: abs, sign, sqrt ! .. +! .. Constants .. + rtmin = sqrt( safmin ) + rtmax = sqrt( safmax/2 ) +! .. ! .. Executable Statements .. ! f1 = abs( f ) @@ -143,20 +145,18 @@ subroutine DLARTG( f, g, c, s, r ) else if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then d = sqrt( f*f + g*g ) - p = one / d - c = f1*p - s = g*sign( p, f ) + c = f1 / d r = sign( d, f ) + s = g / r else u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - fs = f*uu - gs = g*uu + fs = f / u + gs = g / u d = sqrt( fs*fs + gs*gs ) - p = one / d - c = abs( fs )*p - s = gs*sign( p, f ) - r = sign( d, f )*u + c = abs( fs ) / d + r = sign( d, f ) + s = gs / r + r = r*u end if return end subroutine diff --git a/SRC/dlascl2.f b/SRC/dlascl2.f index 901e43c494..568e296ad0 100644 --- a/SRC/dlascl2.f +++ b/SRC/dlascl2.f @@ -1,4 +1,4 @@ -*> \brief \b DLASCL2 performs diagonal scaling on a vector. +*> \brief \b DLASCL2 performs diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -33,7 +33,7 @@ *> *> \verbatim *> -*> DLASCL2 performs a diagonal scaling on a vector: +*> DLASCL2 performs a diagonal scaling on a matrix: *> x <-- D * x *> where the diagonal matrix D is stored as a vector. *> @@ -65,14 +65,14 @@ *> \param[in,out] X *> \verbatim *> X is DOUBLE PRECISION array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/dsbgv.f b/SRC/dsbgv.f index b49df0063e..9bb07a1fa2 100644 --- a/SRC/dsbgv.f +++ b/SRC/dsbgv.f @@ -228,7 +228,7 @@ SUBROUTINE DSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, INFO = -12 END IF IF( INFO.NE.0 ) THEN - CALL XERBLA( 'DSBGV ', -INFO ) + CALL XERBLA( 'DSBGV', -INFO ) RETURN END IF * diff --git a/SRC/ieeeck.f b/SRC/ieeeck.f index 74065c3b4e..f9f6332ecf 100644 --- a/SRC/ieeeck.f +++ b/SRC/ieeeck.f @@ -41,7 +41,7 @@ *> \param[in] ISPEC *> \verbatim *> ISPEC is INTEGER -*> Specifies whether to test just for inifinity arithmetic +*> Specifies whether to test just for infinity arithmetic *> or whether to test for infinity and NaN arithmetic. *> = 0: Verify infinity arithmetic only. *> = 1: Verify infinity and NaN arithmetic. diff --git a/SRC/iparam2stage.F b/SRC/iparam2stage.F index c153eef22b..c701c2be08 100644 --- a/SRC/iparam2stage.F +++ b/SRC/iparam2stage.F @@ -178,7 +178,8 @@ INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, * .. * .. External Functions .. INTEGER ILAENV - EXTERNAL ILAENV + LOGICAL LSAME + EXTERNAL ILAENV, LSAME * .. * .. Executable Statements .. * @@ -310,7 +311,7 @@ INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, * * Will add the VECT OPTION HERE next release VECT = OPTS(1:1) - IF( VECT.EQ.'N' ) THEN + IF( LSAME( VECT, 'N' ) ) THEN LHOUS = MAX( 1, 4*NI ) ELSE * This is not correct, it need to call the ALGO and the stage2 diff --git a/SRC/sgecon.f b/SRC/sgecon.f index a284b094be..86aeea73bb 100644 --- a/SRC/sgecon.f +++ b/SRC/sgecon.f @@ -106,6 +106,7 @@ *> INFO is INTEGER *> = 0: successful exit *> < 0: if INFO = -i, the i-th argument had an illegal value +*> =-5: if ANORM is NAN or negative. *> \endverbatim * * Authors: @@ -152,10 +153,10 @@ SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, INTEGER ISAVE( 3 ) * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, SISNAN INTEGER ISAMAX REAL SLAMCH - EXTERNAL LSAME, ISAMAX, SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH, SISNAN * .. * .. External Subroutines .. EXTERNAL SLACN2, SLATRS, SRSCL, XERBLA @@ -175,7 +176,7 @@ SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO ) THEN + ELSE IF( ANORM.LT.ZERO .OR. SISNAN( ANORM ) ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN diff --git a/SRC/sgejsv.f b/SRC/sgejsv.f index 82ac6b94b5..923573bdb4 100644 --- a/SRC/sgejsv.f +++ b/SRC/sgejsv.f @@ -224,7 +224,7 @@ *> *> \param[out] U *> \verbatim -*> U is REAL array, dimension ( LDU, N ) +*> U is REAL array, dimension ( LDU, N ) or ( LDU, M ) *> If JOBU = 'U', then U contains on exit the M-by-N matrix of *> the left singular vectors. *> If JOBU = 'F', then U contains on exit the M-by-M matrix of @@ -271,7 +271,7 @@ *> *> \param[out] WORK *> \verbatim -*> WORK is REAL array, dimension (LWORK) +*> WORK is REAL array, dimension (MAX(7,LWORK)) *> On exit, *> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such *> that SCALE*SVA(1:N) are the computed singular values @@ -318,36 +318,36 @@ *> LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement. *> ->> For optimal performance (blocked code) the optimal value *> is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal -*> block size for DGEQP3 and DGEQRF. +*> block size for SGEQP3 and SGEQRF. *> In general, optimal LWORK is computed as -*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7). +*> LWORK >= max(2*M+N,N+LWORK(SGEQP3),N+LWORK(SGEQRF), 7). *> -> .. an estimate of the scaled condition number of A is *> required (JOBA='E', 'G'). In this case, LWORK is the maximum *> of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7). *> ->> For optimal performance (blocked code) the optimal value *> is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7). *> In general, the optimal length LWORK is computed as -*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), -*> N+N*N+LWORK(DPOCON),7). +*> LWORK >= max(2*M+N,N+LWORK(SGEQP3),N+LWORK(SGEQRF), +*> N+N*N+LWORK(SPOCON),7). *> *> If SIGMA and the right singular vectors are needed (JOBV = 'V'), *> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7). *> -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7), -*> where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ, -*> DORMLQ. In general, the optimal length LWORK is computed as -*> LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON), -*> N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)). +*> where NB is the optimal block size for SGEQP3, SGEQRF, SGELQF, +*> SORMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(SGEQP3), N+LWORK(SPOCON), +*> N+LWORK(SGELQF), 2*N+LWORK(SGEQRF), N+LWORK(SORMLQ)). *> *> If SIGMA and the left singular vectors are needed *> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7). *> -> For optimal performance: *> if JOBU = 'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7), *> if JOBU = 'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7), -*> where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR. +*> where NB is the optimal block size for SGEQP3, SGEQRF, SORMQR. *> In general, the optimal length LWORK is computed as -*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON), -*> 2*N+LWORK(DGEQRF), N+LWORK(DORMQR)). -*> Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or +*> LWORK >= max(2*M+N,N+LWORK(SGEQP3),N+LWORK(SPOCON), +*> 2*N+LWORK(SGEQRF), N+LWORK(SORMQR)). +*> Here LWORK(SORMQR) equals N*NB (for JOBU = 'U') or *> M*NB (for JOBU = 'F'). *> *> If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and @@ -357,12 +357,12 @@ *> LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6). *> -> For optimal performance, LWORK should be additionally *> larger than N+M*NB, where NB is the optimal block size -*> for DORMQR. +*> for SORMQR. *> \endverbatim *> *> \param[out] IWORK *> \verbatim -*> IWORK is INTEGER array, dimension (M+3*N). +*> IWORK is INTEGER array, dimension (MAX(3,M+3*N)). *> On exit, *> IWORK(1) = the numerical rank determined after the initial *> QR factorization with pivoting. See the descriptions diff --git a/SRC/sgelst.f b/SRC/sgelst.f new file mode 100644 index 0000000000..5377bc720a --- /dev/null +++ b/SRC/sgelst.f @@ -0,0 +1,531 @@ +*> \brief SGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its transpose, using a QR or LQ +*> factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by SGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by SGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup realGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SGELQT, SGEQRT, SGEMLQT, SGEMQRT, SLABAD, + $ SLASCL, SLASET, STRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'SGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = REAL( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'SGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = SLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL SGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMQRT( 'Left', 'Transpose', M, NRHS, N, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, WORK( MN*NB+1 ), + $ INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL STRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL STRTRS( 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL SGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMLQT( 'Left', 'Transpose', N, NRHS, M, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = REAL( LWOPT ) +* + RETURN +* +* End of SGELST +* + END diff --git a/SRC/shgeqz.f b/SRC/shgeqz.f index 10fb2b7d76..6543f8cb18 100644 --- a/SRC/shgeqz.f +++ b/SRC/shgeqz.f @@ -337,9 +337,9 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, - $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, - $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, - $ WR2 + $ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, + $ U12, U12L, U2, ULP, VS, W11, W12, W21, W22, + $ WABS, WI, WR, WR2 * .. * .. Local Arrays .. REAL V( 3 ) @@ -1127,25 +1127,27 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, H( J+2, J-1 ) = ZERO END IF * + T2 = TAU * V( 2 ) + T3 = TAU * V( 3 ) DO 230 JC = J, ILASTM - TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* - $ H( J+2, JC ) ) - H( J, JC ) = H( J, JC ) - TEMP - H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) - H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) - TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* - $ T( J+2, JC ) ) - T( J, JC ) = T( J, JC ) - TEMP2 - T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) - T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) + TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* + $ H( J+2, JC ) + H( J, JC ) = H( J, JC ) - TEMP*TAU + H( J+1, JC ) = H( J+1, JC ) - TEMP*T2 + H( J+2, JC ) = H( J+2, JC ) - TEMP*T3 + TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* + $ T( J+2, JC ) + T( J, JC ) = T( J, JC ) - TEMP2*TAU + T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2 + T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3 230 CONTINUE IF( ILQ ) THEN DO 240 JR = 1, N - TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* - $ Q( JR, J+2 ) ) - Q( JR, J ) = Q( JR, J ) - TEMP - Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) - Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) + Q( JR, J ) = Q( JR, J ) - TEMP*TAU + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2 + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3 240 CONTINUE END IF * @@ -1233,27 +1235,29 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, * * Apply transformations from the right. * + T2 = TAU*V( 2 ) + T3 = TAU*V( 3 ) DO 260 JR = IFRSTM, MIN( J+3, ILAST ) - TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* - $ H( JR, J+2 ) ) - H( JR, J ) = H( JR, J ) - TEMP - H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) - H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) + TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* + $ H( JR, J+2 ) + H( JR, J ) = H( JR, J ) - TEMP*TAU + H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2 + H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3 260 CONTINUE DO 270 JR = IFRSTM, J + 2 - TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* - $ T( JR, J+2 ) ) - T( JR, J ) = T( JR, J ) - TEMP - T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) - T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) + TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* + $ T( JR, J+2 ) + T( JR, J ) = T( JR, J ) - TEMP*TAU + T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2 + T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3 270 CONTINUE IF( ILZ ) THEN DO 280 JR = 1, N - TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* - $ Z( JR, J+2 ) ) - Z( JR, J ) = Z( JR, J ) - TEMP - Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) - Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) + Z( JR, J ) = Z( JR, J ) - TEMP*TAU + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2 + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3 280 CONTINUE END IF T( J+1, J ) = ZERO diff --git a/SRC/slaqr5.f b/SRC/slaqr5.f index a4f805674d..b10e597542 100644 --- a/SRC/slaqr5.f +++ b/SRC/slaqr5.f @@ -558,10 +558,13 @@ SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * . Mth bulge. Exploit fact that first two elements * . of row are actually zero. ==== * - REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) - H( K+3, K ) = -REFSUM - H( K+3, K+1 ) = -REFSUM*V( 2, M ) - H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*V( 3, M ) + T1 = V( 1, M ) + T2 = T1*V( 2, M ) + T3 = T1*V( 3, M ) + REFSUM = V( 3, M )*H( K+3, K+2 ) + H( K+3, K ) = -REFSUM*T1 + H( K+3, K+1 ) = -REFSUM*T2 + H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*T3 * * ==== Calculate reflection to move * . Mth bulge one step. ==== @@ -597,11 +600,13 @@ SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, $ VT ) ALPHA = VT( 1 ) CALL SLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) - REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* - $ H( K+2, K ) ) + T1 = VT( 1 ) + T2 = T1*VT( 2 ) + T3 = T2*VT( 3 ) + REFSUM = H( K+1, K )+VT( 2 )*H( K+2, K ) * - IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ - $ ABS( REFSUM*VT( 3 ) ).GT.ULP* + IF( ABS( H( K+2, K )-REFSUM*T2 )+ + $ ABS( REFSUM*T3 ).GT.ULP* $ ( ABS( H( K, K ) )+ABS( H( K+1, $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN * @@ -619,7 +624,7 @@ SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * . Replace the old reflector with * . the new one. ==== * - H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+1, K ) = H( K+1, K ) - REFSUM*T1 H( K+2, K ) = ZERO H( K+3, K ) = ZERO V( 1, M ) = VT( 1 ) diff --git a/SRC/slaqz0.f b/SRC/slaqz0.f index 69f4029148..2e06f9d42c 100644 --- a/SRC/slaqz0.f +++ b/SRC/slaqz0.f @@ -675,7 +675,7 @@ RECURSIVE SUBROUTINE SLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, NS = MIN( NSHIFTS, ISTOP-ISTART2 ) NS = MIN( NS, N_UNDEFLATED ) - SHIFTPOS = ISTOP-N_DEFLATED-N_UNDEFLATED+1 + SHIFTPOS = ISTOP-N_UNDEFLATED+1 * * Shuffle shifts to put double shifts in front * This ensures that we don't split up a double shift diff --git a/SRC/slarra.f b/SRC/slarra.f index be81b0f691..2e27383e13 100644 --- a/SRC/slarra.f +++ b/SRC/slarra.f @@ -164,6 +164,7 @@ SUBROUTINE SLARRA( N, D, E, E2, SPLTOL, TNRM, * .. Executable Statements .. * INFO = 0 + NSPLIT = 1 * * Quick return if possible * @@ -172,7 +173,6 @@ SUBROUTINE SLARRA( N, D, E, E2, SPLTOL, TNRM, END IF * * Compute splitting points - NSPLIT = 1 IF(SPLTOL.LT.ZERO) THEN * Criterion based on absolute off-diagonal value TMP1 = ABS(SPLTOL)* TNRM diff --git a/SRC/slarrc.f b/SRC/slarrc.f index 060051539f..2100d1b3dc 100644 --- a/SRC/slarrc.f +++ b/SRC/slarrc.f @@ -167,6 +167,9 @@ SUBROUTINE SLARRC( JOBT, N, VL, VU, D, E, PIVMIN, * .. Executable Statements .. * INFO = 0 + LCNT = 0 + RCNT = 0 + EIGCNT = 0 * * Quick return if possible * @@ -174,9 +177,6 @@ SUBROUTINE SLARRC( JOBT, N, VL, VU, D, E, PIVMIN, RETURN END IF * - LCNT = 0 - RCNT = 0 - EIGCNT = 0 MATT = LSAME( JOBT, 'T' ) diff --git a/SRC/slarrd.f b/SRC/slarrd.f index 7df8e95fca..21405baa6a 100644 --- a/SRC/slarrd.f +++ b/SRC/slarrd.f @@ -381,6 +381,7 @@ SUBROUTINE SLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, * .. Executable Statements .. * INFO = 0 + M = 0 * * Quick return if possible * @@ -424,14 +425,9 @@ SUBROUTINE SLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, END IF * Initialize error flags - INFO = 0 NCNVRG = .FALSE. TOOFEW = .FALSE. -* Quick return if possible - M = 0 - IF( N.EQ.0 ) RETURN - * Simplification: IF( IRANGE.EQ.INDRNG .AND. IL.EQ.1 .AND. IU.EQ.N ) IRANGE = 1 diff --git a/SRC/slarre.f b/SRC/slarre.f index 34dd71fd9d..2e34ca5a6a 100644 --- a/SRC/slarre.f +++ b/SRC/slarre.f @@ -367,6 +367,8 @@ SUBROUTINE SLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, * INFO = 0 + NSPLIT = 0 + M = 0 * * Quick return if possible * @@ -384,8 +386,6 @@ SUBROUTINE SLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, IRANGE = INDRNG END IF - M = 0 - * Get machine constants SAFMIN = SLAMCH( 'S' ) EPS = SLAMCH( 'P' ) diff --git a/SRC/slarscl2.f b/SRC/slarscl2.f index 5726f12cd5..c7b77c9083 100644 --- a/SRC/slarscl2.f +++ b/SRC/slarscl2.f @@ -1,4 +1,4 @@ -*> \brief \b SLARSCL2 performs reciprocal diagonal scaling on a vector. +*> \brief \b SLARSCL2 performs reciprocal diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -33,7 +33,7 @@ *> *> \verbatim *> -*> SLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> SLARSCL2 performs a reciprocal diagonal scaling on a matrix: *> x <-- inv(D) * x *> where the diagonal matrix D is stored as a vector. *> @@ -65,14 +65,14 @@ *> \param[in,out] X *> \verbatim *> X is REAL array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/slartg.f90 b/SRC/slartg.f90 index a9af1aa8d5..8a5a8f26a3 100644 --- a/SRC/slartg.f90 +++ b/SRC/slartg.f90 @@ -35,7 +35,7 @@ !> square root of the sum of squares. !> !> This version is discontinuous in R at F = 0 but it returns the same -!> C and S as SLARTG for complex inputs (F,0) and (G,0). +!> C and S as CLARTG for complex inputs (F,0) and (G,0). !> !> This is a more accurate version of the BLAS1 routine SROTG, !> with the following other differences: @@ -45,8 +45,6 @@ !> floating point operations (saves work in SBDSQR when !> there are zeros on the diagonal). !> -!> If F exceeds G in magnitude, C will be positive. -!> !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. !> \endverbatim ! @@ -112,7 +110,7 @@ subroutine SLARTG( f, g, c, s, r ) use LA_CONSTANTS, & only: wp=>sp, zero=>szero, half=>shalf, one=>sone, & - rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax + safmin=>ssafmin, safmax=>ssafmax ! ! -- LAPACK auxiliary routine -- ! -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -123,11 +121,15 @@ subroutine SLARTG( f, g, c, s, r ) real(wp) :: c, f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, fs, g1, gs, p, u, uu + real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax ! .. ! .. Intrinsic Functions .. intrinsic :: abs, sign, sqrt ! .. +! .. Constants .. + rtmin = sqrt( safmin ) + rtmax = sqrt( safmax/2 ) +! .. ! .. Executable Statements .. ! f1 = abs( f ) @@ -143,20 +145,18 @@ subroutine SLARTG( f, g, c, s, r ) else if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then d = sqrt( f*f + g*g ) - p = one / d - c = f1*p - s = g*sign( p, f ) + c = f1 / d r = sign( d, f ) + s = g / r else u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - fs = f*uu - gs = g*uu + fs = f / u + gs = g / u d = sqrt( fs*fs + gs*gs ) - p = one / d - c = abs( fs )*p - s = gs*sign( p, f ) - r = sign( d, f )*u + c = abs( fs ) / d + r = sign( d, f ) + s = gs / r + r = r*u end if return end subroutine diff --git a/SRC/slascl2.f b/SRC/slascl2.f index 07b506a8c1..5efc1cfcd2 100644 --- a/SRC/slascl2.f +++ b/SRC/slascl2.f @@ -1,4 +1,4 @@ -*> \brief \b SLASCL2 performs diagonal scaling on a vector. +*> \brief \b SLASCL2 performs diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -33,7 +33,7 @@ *> *> \verbatim *> -*> SLASCL2 performs a diagonal scaling on a vector: +*> SLASCL2 performs a diagonal scaling on a matrix: *> x <-- D * x *> where the diagonal matrix D is stored as a vector. *> @@ -65,14 +65,14 @@ *> \param[in,out] X *> \verbatim *> X is REAL array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/ssbgv.f b/SRC/ssbgv.f index 0a7f73e0a8..8930867658 100644 --- a/SRC/ssbgv.f +++ b/SRC/ssbgv.f @@ -228,7 +228,7 @@ SUBROUTINE SSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, INFO = -12 END IF IF( INFO.NE.0 ) THEN - CALL XERBLA( 'SSBGV ', -INFO ) + CALL XERBLA( 'SSBGV', -INFO ) RETURN END IF * diff --git a/SRC/zgecon.f b/SRC/zgecon.f index 3d3127f9df..9cbfe35bcd 100644 --- a/SRC/zgecon.f +++ b/SRC/zgecon.f @@ -106,6 +106,7 @@ *> INFO is INTEGER *> = 0: successful exit *> < 0: if INFO = -i, the i-th argument had an illegal value +*> =-5: if ANORM is NAN or negative. *> \endverbatim * * Authors: @@ -153,10 +154,10 @@ SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, INTEGER ISAVE( 3 ) * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, DISNAN INTEGER IZAMAX DOUBLE PRECISION DLAMCH - EXTERNAL LSAME, IZAMAX, DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH, DISNAN * .. * .. External Subroutines .. EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATRS @@ -182,7 +183,7 @@ SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -4 - ELSE IF( ANORM.LT.ZERO ) THEN + ELSE IF( ANORM.LT.ZERO .OR. DISNAN( ANORM ) ) THEN INFO = -5 END IF IF( INFO.NE.0 ) THEN diff --git a/SRC/zgejsv.f b/SRC/zgejsv.f index d1106696c0..5134ea8912 100644 --- a/SRC/zgejsv.f +++ b/SRC/zgejsv.f @@ -304,7 +304,7 @@ *> -> the minimal requirement is LWORK >= 3*N. *> -> For optimal performance, *> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQF, *> ZUNMLQ. In general, the optimal length LWORK is computed as *> LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(ZGESVJ), *> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). @@ -313,7 +313,7 @@ *> -> the minimal requirement is LWORK >= 3*N. *> -> For optimal performance, *> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, -*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQF, *> ZUNMLQ. In general, the optimal length LWORK is computed as *> LWORK >= max(N+LWORK(ZGEQP3), LWORK(ZPOCON), N+LWORK(ZGESVJ), *> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). @@ -349,7 +349,7 @@ *> *> \param[out] RWORK *> \verbatim -*> RWORK is DOUBLE PRECISION array, dimension (MAX(7,LWORK)) +*> RWORK is DOUBLE PRECISION array, dimension (MAX(7,LRWORK)) *> On exit, *> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) *> such that SCALE*SVA(1:N) are the computed singular values diff --git a/SRC/zgelst.f b/SRC/zgelst.f new file mode 100644 index 0000000000..4dabdc91e6 --- /dev/null +++ b/SRC/zgelst.f @@ -0,0 +1,533 @@ +*> \brief ZGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its conjugate-transpose, using a QR +*> or LQ factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by ZGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by ZGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> modulus of elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of the modulus of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex16GEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL ZGELQT, ZGEQRT, ZGEMLQT, ZGEMQRT, DLABAD, + $ ZLASCL, ZLASET, ZTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'ZGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = DBLE( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'ZGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL ZGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMQRT( 'Left', 'Conjugate transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL ZGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMLQT( 'Left', 'Conjugate transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ M, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = DBLE( LWOPT ) +* + RETURN +* +* End of ZGELST +* + END diff --git a/SRC/zhbgv.f b/SRC/zhbgv.f index 2d587bd027..297ad6da73 100644 --- a/SRC/zhbgv.f +++ b/SRC/zhbgv.f @@ -235,7 +235,7 @@ SUBROUTINE ZHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, INFO = -12 END IF IF( INFO.NE.0 ) THEN - CALL XERBLA( 'ZHBGV ', -INFO ) + CALL XERBLA( 'ZHBGV', -INFO ) RETURN END IF * diff --git a/SRC/zlaqr5.f b/SRC/zlaqr5.f index 4fa5ee5b06..d8c521349e 100644 --- a/SRC/zlaqr5.f +++ b/SRC/zlaqr5.f @@ -533,11 +533,13 @@ SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, * . Mth bulge. Exploit fact that first two elements * . of row are actually zero. ==== * - REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) - H( K+3, K ) = -REFSUM - H( K+3, K+1 ) = -REFSUM*DCONJG( V( 2, M ) ) - H( K+3, K+2 ) = H( K+3, K+2 ) - - $ REFSUM*DCONJG( V( 3, M ) ) + T1 = V( 1, M ) + T2 = T1*DCONJG( V( 2, M ) ) + T3 = T1*DCONJG( V( 3, M ) ) + REFSUM = V( 3, M )*H( K+3, K+2 ) + H( K+3, K ) = -REFSUM*T1 + H( K+3, K+1 ) = -REFSUM*T2 + H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*T3 * * ==== Calculate reflection to move * . Mth bulge one step. ==== @@ -572,12 +574,13 @@ SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, $ S( 2*M ), VT ) ALPHA = VT( 1 ) CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) - REFSUM = DCONJG( VT( 1 ) )* - $ ( H( K+1, K )+DCONJG( VT( 2 ) )* - $ H( K+2, K ) ) + T1 = DCONJG( VT( 1 ) ) + T2 = T1*VT( 2 ) + T3 = T1*VT( 3 ) + REFSUM = H( K+1, K )+DCONJG( VT( 2 ) )*H( K+2, K ) * - IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+ - $ CABS1( REFSUM*VT( 3 ) ).GT.ULP* + IF( CABS1( H( K+2, K )-REFSUM*T2 )+ + $ CABS1( REFSUM*T3 ).GT.ULP* $ ( CABS1( H( K, K ) )+CABS1( H( K+1, $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN * @@ -595,7 +598,7 @@ SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, * . Replace the old reflector with * . the new one. ==== * - H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+1, K ) = H( K+1, K ) - REFSUM*T1 H( K+2, K ) = ZERO H( K+3, K ) = ZERO V( 1, M ) = VT( 1 ) diff --git a/SRC/zlaqz0.f b/SRC/zlaqz0.f index 0d8884ed5e..3e20200ed4 100644 --- a/SRC/zlaqz0.f +++ b/SRC/zlaqz0.f @@ -646,7 +646,7 @@ RECURSIVE SUBROUTINE ZLAQZ0( WANTS, WANTQ, WANTZ, N, ILO, IHI, A, NS = MIN( NSHIFTS, ISTOP-ISTART2 ) NS = MIN( NS, N_UNDEFLATED ) - SHIFTPOS = ISTOP-N_DEFLATED-N_UNDEFLATED+1 + SHIFTPOS = ISTOP-N_UNDEFLATED+1 IF ( MOD( LD, 6 ) .EQ. 0 ) THEN * diff --git a/SRC/zlarscl2.f b/SRC/zlarscl2.f index 4a1e1603a4..e618659067 100644 --- a/SRC/zlarscl2.f +++ b/SRC/zlarscl2.f @@ -1,4 +1,4 @@ -*> \brief \b ZLARSCL2 performs reciprocal diagonal scaling on a vector. +*> \brief \b ZLARSCL2 performs reciprocal diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -34,7 +34,7 @@ *> *> \verbatim *> -*> ZLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> ZLARSCL2 performs a reciprocal diagonal scaling on a matrix: *> x <-- inv(D) * x *> where the DOUBLE PRECISION diagonal matrix D is stored as a vector. *> @@ -66,14 +66,14 @@ *> \param[in,out] X *> \verbatim *> X is COMPLEX*16 array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/SRC/zlartg.f90 b/SRC/zlartg.f90 index 337a4dda85..a4f9bd4b00 100644 --- a/SRC/zlartg.f90 +++ b/SRC/zlartg.f90 @@ -11,8 +11,8 @@ ! SUBROUTINE ZLARTG( F, G, C, S, R ) ! ! .. Scalar Arguments .. -! REAL(wp) C -! COMPLEX(wp) F, G, R, S +! REAL(wp) C +! COMPLEX(wp) F, G, R, S ! .. ! !> \par Purpose: @@ -30,7 +30,7 @@ !> The mathematical formulas used for C and S are !> !> sgn(x) = { x / |x|, x != 0 -!> { 1, x = 0 +!> { 1, x = 0 !> !> R = sgn(F) * sqrt(|F|**2 + |G|**2) !> @@ -38,6 +38,10 @@ !> !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) !> +!> Special conditions: +!> If G=0, then C=1 and S=0. +!> If F=0, then C=0 and S is chosen so that R is real. +!> !> When F and G are real, the formulas simplify to C = F/R and !> S = G/R, and the returned values of C, S, and R should be !> identical to those returned by DLARTG. @@ -46,11 +50,8 @@ !> to avoid overflow or underflow in computing the square root of the !> sum of squares. !> -!> This is a faster version of the BLAS1 routine ZROTG, except for -!> the following differences: -!> F and G are unchanged on return. -!> If G=0, then C=1 and S=0. -!> If F=0, then C=0 and S is chosen so that R is real. +!> This is the same routine ZROTG fom BLAS1, except that +!> F and G are unchanged on return. !> !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. !> \endverbatim @@ -91,22 +92,19 @@ ! Authors: ! ======== ! -!> \author Edward Anderson, Lockheed Martin +!> \author Weslley Pereira, University of Colorado Denver, USA ! -!> \date August 2016 +!> \date December 2021 ! !> \ingroup OTHERauxiliary ! -!> \par Contributors: -! ================== -!> -!> Weslley Pereira, University of Colorado Denver, USA -! !> \par Further Details: ! ===================== !> !> \verbatim !> +!> Based on the algorithm from +!> !> Anderson E. (2017) !> Algorithm 978: Safe Scaling in the Level 1 BLAS !> ACM Trans Math Softw 44:1--28 @@ -117,7 +115,7 @@ subroutine ZLARTG( f, g, c, s, r ) use LA_CONSTANTS, & only: wp=>dp, zero=>dzero, one=>done, two=>dtwo, czero=>zzero, & - rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax + safmin=>dsafmin, safmax=>dsafmax ! ! -- LAPACK auxiliary routine -- ! -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -129,7 +127,7 @@ subroutine ZLARTG( f, g, c, s, r ) complex(wp) f, g, r, s ! .. ! .. Local Scalars .. - real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w + real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax complex(wp) :: fs, gs, t ! .. ! .. Intrinsic Functions .. @@ -141,6 +139,9 @@ subroutine ZLARTG( f, g, c, s, r ) ! .. Statement Function definitions .. ABSSQ( t ) = real( t )**2 + aimag( t )**2 ! .. +! .. Constants .. + rtmin = sqrt( safmin ) +! .. ! .. Executable Statements .. ! if( g == czero ) then @@ -149,30 +150,43 @@ subroutine ZLARTG( f, g, c, s, r ) r = f else if( f == czero ) then c = zero - g1 = max( abs(real(g)), abs(aimag(g)) ) - if( g1 > rtmin .and. g1 < rtmax ) then + if( real(g) == zero ) then + r = abs(aimag(g)) + s = conjg( g ) / r + elseif( aimag(g) == zero ) then + r = abs(real(g)) + s = conjg( g ) / r + else + g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/2 ) + if( g1 > rtmin .and. g1 < rtmax ) then ! ! Use unscaled algorithm ! - g2 = ABSSQ( g ) - d = sqrt( g2 ) - s = conjg( g ) / d - r = d - else +! The following two lines can be replaced by `d = abs( g )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( g ) + d = sqrt( g2 ) + s = conjg( g ) / d + r = d + else ! ! Use scaled algorithm ! - u = min( safmax, max( safmin, g1 ) ) - uu = one / u - gs = g*uu - g2 = ABSSQ( gs ) - d = sqrt( g2 ) - s = conjg( gs ) / d - r = d*u + u = min( safmax, max( safmin, g1 ) ) + gs = g / u +! The following two lines can be replaced by `d = abs( gs )`. +! This algorithm do not use the intrinsic complex abs. + g2 = ABSSQ( gs ) + d = sqrt( g2 ) + s = conjg( gs ) / d + r = d*u + end if end if else f1 = max( abs(real(f)), abs(aimag(f)) ) g1 = max( abs(real(g)), abs(aimag(g)) ) + rtmax = sqrt( safmax/4 ) if( f1 > rtmin .and. f1 < rtmax .and. & g1 > rtmin .and. g1 < rtmax ) then ! @@ -181,32 +195,51 @@ subroutine ZLARTG( f, g, c, s, r ) f2 = ABSSQ( f ) g2 = ABSSQ( g ) h2 = f2 + g2 - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = f / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( g ) * ( f / sqrt( f2*h2 ) ) + else + s = conjg( g ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = f / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = f * ( h2 / d ) + end if + s = conjg( g ) * ( f / d ) end if - p = 1 / d - c = f2*p - s = conjg( g )*( f*p ) - r = f*( h2*p ) else ! ! Use scaled algorithm ! u = min( safmax, max( safmin, f1, g1 ) ) - uu = one / u - gs = g*uu + gs = g / u g2 = ABSSQ( gs ) - if( f1*uu < rtmin ) then + if( f1 / u < rtmin ) then ! ! f is not well-scaled when scaled by g1. ! Use a different scaling for f. ! v = min( safmax, max( safmin, f1 ) ) - vv = one / v - w = v * uu - fs = f*vv + w = v / u + fs = f / v f2 = ABSSQ( fs ) h2 = f2*w**2 + g2 else @@ -214,19 +247,43 @@ subroutine ZLARTG( f, g, c, s, r ) ! Otherwise use the same scaling for f and g. ! w = one - fs = f*uu + fs = f / u f2 = ABSSQ( fs ) h2 = f2 + g2 end if - if( f2 > rtmin .and. h2 < rtmax ) then - d = sqrt( f2*h2 ) + ! safmin <= f2 <= h2 <= safmax + if( f2 >= h2 * safmin ) then + ! safmin <= f2/h2 <= 1, and h2/f2 is finite + c = sqrt( f2 / h2 ) + r = fs / c + rtmax = rtmax * 2 + if( f2 > rtmin .and. h2 < rtmax ) then + ! safmin <= sqrt( f2*h2 ) <= safmax + s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) + else + s = conjg( gs ) * ( r / h2 ) + end if else - d = sqrt( f2 )*sqrt( h2 ) + ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. + ! Moreover, + ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, + ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). + ! Also, + ! g2 >> f2, which means that h2 = g2. + d = sqrt( f2 * h2 ) + c = f2 / d + if( c >= safmin ) then + r = fs / c + else + ! f2 / sqrt(f2 * h2) < safmin, then + ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax + r = fs * ( h2 / d ) + end if + s = conjg( gs ) * ( fs / d ) end if - p = 1 / d - c = ( f2*p )*w - s = conjg( gs )*( fs*p ) - r = ( fs*( h2*p ) )*u + ! Rescale c and r + c = c * w + r = r * u end if end if return diff --git a/SRC/zlascl2.f b/SRC/zlascl2.f index c4e6992fbe..26406c3636 100644 --- a/SRC/zlascl2.f +++ b/SRC/zlascl2.f @@ -1,4 +1,4 @@ -*> \brief \b ZLASCL2 performs diagonal scaling on a vector. +*> \brief \b ZLASCL2 performs diagonal scaling on a matrix. * * =========== DOCUMENTATION =========== * @@ -34,7 +34,7 @@ *> *> \verbatim *> -*> ZLASCL2 performs a diagonal scaling on a vector: +*> ZLASCL2 performs a diagonal scaling on a matrix: *> x <-- D * x *> where the DOUBLE PRECISION diagonal matrix D is stored as a vector. *> @@ -66,14 +66,14 @@ *> \param[in,out] X *> \verbatim *> X is COMPLEX*16 array, dimension (LDX,N) -*> On entry, the vector X to be scaled by D. -*> On exit, the scaled vector. +*> On entry, the matrix X to be scaled by D. +*> On exit, the scaled matrix. *> \endverbatim *> *> \param[in] LDX *> \verbatim *> LDX is INTEGER -*> The leading dimension of the vector X. LDX >= M. +*> The leading dimension of the matrix X. LDX >= M. *> \endverbatim * * Authors: diff --git a/TESTING/EIG/cchkhs.f b/TESTING/EIG/cchkhs.f index 65f1fc82d4..6c6430d5f8 100644 --- a/TESTING/EIG/cchkhs.f +++ b/TESTING/EIG/cchkhs.f @@ -21,7 +21,7 @@ * .. Array Arguments .. * LOGICAL DOTYPE( * ), SELECT( * ) * INTEGER ISEED( 4 ), IWORK( * ), NN( * ) -* REAL RESULT( 14 ), RWORK( * ) +* REAL RESULT( 16 ), RWORK( * ) * COMPLEX A( LDA, * ), EVECTL( LDU, * ), * $ EVECTR( LDU, * ), EVECTX( LDU, * ), * $ EVECTY( LDU, * ), H( LDA, * ), T1( LDA, * ), @@ -64,10 +64,15 @@ *> eigenvectors of H. Y is lower triangular, and X is *> upper triangular. *> +*> CTREVC3 computes left and right eigenvector matrices +*> from a Schur matrix T and backtransforms them with Z +*> to eigenvector matrices L and R for A. L and R are +*> GE matrices. +*> *> When CCHKHS is called, a number of matrix "sizes" ("n's") and a *> number of matrix "types" are specified. For each size ("n") *> and each type of matrix, one matrix will be generated and used -*> to test the nonsymmetric eigenroutines. For each matrix, 14 +*> to test the nonsymmetric eigenroutines. For each matrix, 16 *> tests will be performed: *> *> (1) | A - U H U**H | / ( |A| n ulp ) @@ -98,6 +103,10 @@ *> *> (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) *> +*> (15) | AR - RW | / ( |A| |R| ulp ) +*> +*> (16) | LA - WL | / ( |A| |L| ulp ) +*> *> The "sizes" are specified by an array NN(1:NSIZES); the value of *> each element NN(j) specifies one size. *> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); @@ -331,7 +340,7 @@ *> Workspace. Could be equivalenced to IWORK, but not RWORK. *> Modified. *> -*> RESULT - REAL array, dimension (14) +*> RESULT - REAL array, dimension (16) *> The values computed by the fourteen tests described above. *> The values are currently limited to 1/ulp, to avoid *> overflow. @@ -421,7 +430,7 @@ SUBROUTINE CCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * .. Array Arguments .. LOGICAL DOTYPE( * ), SELECT( * ) INTEGER ISEED( 4 ), IWORK( * ), NN( * ) - REAL RESULT( 14 ), RWORK( * ) + REAL RESULT( 16 ), RWORK( * ) COMPLEX A( LDA, * ), EVECTL( LDU, * ), $ EVECTR( LDU, * ), EVECTX( LDU, * ), $ EVECTY( LDU, * ), H( LDA, * ), T1( LDA, * ), @@ -463,8 +472,8 @@ SUBROUTINE CCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * .. External Subroutines .. EXTERNAL CCOPY, CGEHRD, CGEMM, CGET10, CGET22, CHSEIN, $ CHSEQR, CHST01, CLACPY, CLASET, CLATME, CLATMR, - $ CLATMS, CTREVC, CUNGHR, CUNMHR, SLABAD, SLAFTS, - $ SLASUM, XERBLA + $ CLATMS, CTREVC, CTREVC3, CUNGHR, CUNMHR, + $ SLABAD, SLAFTS, SLASUM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, REAL, SQRT @@ -1067,6 +1076,66 @@ SUBROUTINE CCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, $ RESULT( 14 ) = DUMMA( 3 )*ANINV END IF * +* Compute Left and Right Eigenvectors of A +* +* Compute a Right eigenvector matrix: +* + NTEST = 15 + RESULT( 15 ) = ULPINV +* + CALL CLACPY( ' ', N, N, UZ, LDU, EVECTR, LDU ) +* + CALL CTREVC3( 'Right', 'Back', SELECT, N, T1, LDA, CDUMMA, + $ LDU, EVECTR, LDU, N, IN, WORK, NWORK, RWORK, + $ N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CTREVC3(R,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 15: | AR - RW | / ( |A| |R| ulp ) +* +* (from Schur decomposition) +* + CALL CGET22( 'N', 'N', 'N', N, A, LDA, EVECTR, LDU, W1, + $ WORK, RWORK, DUMMA( 1 ) ) + RESULT( 15 ) = DUMMA( 1 ) + IF( DUMMA( 2 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Right', 'CTREVC3', + $ DUMMA( 2 ), N, JTYPE, IOLDSD + END IF +* +* Compute a Left eigenvector matrix: +* + NTEST = 16 + RESULT( 16 ) = ULPINV +* + CALL CLACPY( ' ', N, N, UZ, LDU, EVECTL, LDU ) +* + CALL CTREVC3( 'Left', 'Back', SELECT, N, T1, LDA, EVECTL, + $ LDU, CDUMMA, LDU, N, IN, WORK, NWORK, RWORK, + $ N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CTREVC3(L,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 16: | LA - WL | / ( |A| |L| ulp ) +* +* (from Schur decomposition) +* + CALL CGET22( 'Conj', 'N', 'Conj', N, A, LDA, EVECTL, LDU, + $ W1, WORK, RWORK, DUMMA( 3 ) ) + RESULT( 16 ) = DUMMA( 3 ) + IF( DUMMA( 4 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Left', 'CTREVC3', DUMMA( 4 ), + $ N, JTYPE, IOLDSD + END IF +* * End of Loop -- Check for RESULT(j) > THRESH * 240 CONTINUE diff --git a/TESTING/EIG/chet21.f b/TESTING/EIG/chet21.f index a274681c98..1bd35cb41c 100644 --- a/TESTING/EIG/chet21.f +++ b/TESTING/EIG/chet21.f @@ -304,9 +304,9 @@ SUBROUTINE CHET21( ITYPE, UPLO, N, KBAND, A, LDA, D, E, U, LDU, V, 10 CONTINUE * IF( N.GT.1 .AND. KBAND.EQ.1 ) THEN - DO 20 J = 2, N - 1 + DO 20 J = 1, N - 1 CALL CHER2( CUPLO, N, -CMPLX( E( J ) ), U( 1, J ), 1, - $ U( 1, J-1 ), 1, WORK, N ) + $ U( 1, J+1 ), 1, WORK, N ) 20 CONTINUE END IF WNORM = CLANHE( '1', CUPLO, N, WORK, N, RWORK ) diff --git a/TESTING/EIG/csyl01.f b/TESTING/EIG/csyl01.f index e21f1a7a03..82d790daa5 100644 --- a/TESTING/EIG/csyl01.f +++ b/TESTING/EIG/csyl01.f @@ -124,7 +124,7 @@ SUBROUTINE CSYL01( THRESH, NFAIL, RMAX, NINFO, KNT ) $ C( MAXM, MAXN ), CC( MAXM, MAXN ), $ X( MAXM, MAXN ), $ DUML( MAXM ), DUMR( MAXN ), - $ D( MIN( MAXM, MAXN ) ) + $ D( MAX( MAXM, MAXN ) ) REAL SWORK( LDSWORK, 54 ), DUM( MAXN ), VM( 2 ) INTEGER ISEED( 4 ), IWORK( MAXM + MAXN + 2 ) * .. diff --git a/TESTING/EIG/dchkhs.f b/TESTING/EIG/dchkhs.f index 2e57498965..79ba960086 100644 --- a/TESTING/EIG/dchkhs.f +++ b/TESTING/EIG/dchkhs.f @@ -23,7 +23,7 @@ * INTEGER ISEED( 4 ), IWORK( * ), NN( * ) * DOUBLE PRECISION A( LDA, * ), EVECTL( LDU, * ), * $ EVECTR( LDU, * ), EVECTX( LDU, * ), -* $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 14 ), +* $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 16 ), * $ T1( LDA, * ), T2( LDA, * ), TAU( * ), * $ U( LDU, * ), UU( LDU, * ), UZ( LDU, * ), * $ WI1( * ), WI2( * ), WI3( * ), WORK( * ), @@ -49,15 +49,21 @@ *> T is "quasi-triangular", and the eigenvalue vector W. *> *> DTREVC computes the left and right eigenvector matrices -*> L and R for T. +*> L and R for T. L is lower quasi-triangular, and R is +*> upper quasi-triangular. *> *> DHSEIN computes the left and right eigenvector matrices *> Y and X for H, using inverse iteration. *> +*> DTREVC3 computes left and right eigenvector matrices +*> from a Schur matrix T and backtransforms them with Z +*> to eigenvector matrices L and R for A. L and R are +*> GE matrices. +*> *> When DCHKHS is called, a number of matrix "sizes" ("n's") and a *> number of matrix "types" are specified. For each size ("n") *> and each type of matrix, one matrix will be generated and used -*> to test the nonsymmetric eigenroutines. For each matrix, 14 +*> to test the nonsymmetric eigenroutines. For each matrix, 16 *> tests will be performed: *> *> (1) | A - U H U**T | / ( |A| n ulp ) @@ -88,6 +94,10 @@ *> *> (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) *> +*> (15) | AR - RW | / ( |A| |R| ulp ) +*> +*> (16) | LA - WL | / ( |A| |L| ulp ) +*> *> The "sizes" are specified by an array NN(1:NSIZES); the value of *> each element NN(j) specifies one size. *> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); @@ -331,7 +341,7 @@ *> Workspace. *> Modified. *> -*> RESULT - DOUBLE PRECISION array, dimension (14) +*> RESULT - DOUBLE PRECISION array, dimension (16) *> The values computed by the fourteen tests described above. *> The values are currently limited to 1/ulp, to avoid *> overflow. @@ -423,7 +433,7 @@ SUBROUTINE DCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, INTEGER ISEED( 4 ), IWORK( * ), NN( * ) DOUBLE PRECISION A( LDA, * ), EVECTL( LDU, * ), $ EVECTR( LDU, * ), EVECTX( LDU, * ), - $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 14 ), + $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 16 ), $ T1( LDA, * ), T2( LDA, * ), TAU( * ), $ U( LDU, * ), UU( LDU, * ), UZ( LDU, * ), $ WI1( * ), WI2( * ), WI3( * ), WORK( * ), @@ -461,7 +471,7 @@ SUBROUTINE DCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, EXTERNAL DCOPY, DGEHRD, DGEMM, DGET10, DGET22, DHSEIN, $ DHSEQR, DHST01, DLABAD, DLACPY, DLAFTS, DLASET, $ DLASUM, DLATME, DLATMR, DLATMS, DORGHR, DORMHR, - $ DTREVC, XERBLA + $ DTREVC, DTREVC3, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, SQRT @@ -561,7 +571,7 @@ SUBROUTINE DCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * Initialize RESULT * - DO 30 J = 1, 14 + DO 30 J = 1, 16 RESULT( J ) = ZERO 30 CONTINUE * @@ -1108,6 +1118,64 @@ SUBROUTINE DCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, $ RESULT( 14 ) = DUMMA( 3 )*ANINV END IF * +* Compute Left and Right Eigenvectors of A +* +* Compute a Right eigenvector matrix: +* + NTEST = 15 + RESULT( 15 ) = ULPINV +* + CALL DLACPY( ' ', N, N, UZ, LDU, EVECTR, LDU ) +* + CALL DTREVC3( 'Right', 'Back', SELECT, N, T1, LDA, DUMMA, + $ LDU, EVECTR, LDU, N, IN, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DTREVC3(R,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 15: | AR - RW | / ( |A| |R| ulp ) +* +* (from Schur decomposition) +* + CALL DGET22( 'N', 'N', 'N', N, A, LDA, EVECTR, LDU, WR1, + $ WI1, WORK, DUMMA( 1 ) ) + RESULT( 15 ) = DUMMA( 1 ) + IF( DUMMA( 2 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Right', 'DTREVC3', + $ DUMMA( 2 ), N, JTYPE, IOLDSD + END IF +* +* Compute a Left eigenvector matrix: +* + NTEST = 16 + RESULT( 16 ) = ULPINV +* + CALL DLACPY( ' ', N, N, UZ, LDU, EVECTL, LDU ) +* + CALL DTREVC3( 'Left', 'Back', SELECT, N, T1, LDA, EVECTL, + $ LDU, DUMMA, LDU, N, IN, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DTREVC3(L,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 16: | LA - WL | / ( |A| |L| ulp ) +* +* (from Schur decomposition) +* + CALL DGET22( 'Trans', 'N', 'Conj', N, A, LDA, EVECTL, LDU, + $ WR1, WI1, WORK, DUMMA( 3 ) ) + RESULT( 16 ) = DUMMA( 3 ) + IF( DUMMA( 4 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Left', 'DTREVC3', DUMMA( 4 ), + $ N, JTYPE, IOLDSD + END IF +* * End of Loop -- Check for RESULT(j) > THRESH * 250 CONTINUE diff --git a/TESTING/EIG/derred.f b/TESTING/EIG/derred.f index 6df5178253..11a9320526 100644 --- a/TESTING/EIG/derred.f +++ b/TESTING/EIG/derred.f @@ -99,7 +99,7 @@ SUBROUTINE DERRED( PATH, NUNIT ) * .. * .. External Subroutines .. EXTERNAL CHKXER, DGEES, DGEESX, DGEEV, DGEEVX, DGEJSV, - $ DGESDD, DGESVD, DGESVDX, DGESVQ + $ DGESDD, DGESVD, DGESVDX, DGESVDQ * .. * .. External Functions .. LOGICAL DSLECT, LSAMEN diff --git a/TESTING/EIG/schkhs.f b/TESTING/EIG/schkhs.f index ab0e901383..bf8eb1b409 100644 --- a/TESTING/EIG/schkhs.f +++ b/TESTING/EIG/schkhs.f @@ -23,7 +23,7 @@ * INTEGER ISEED( 4 ), IWORK( * ), NN( * ) * REAL A( LDA, * ), EVECTL( LDU, * ), * $ EVECTR( LDU, * ), EVECTX( LDU, * ), -* $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 14 ), +* $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 16 ), * $ T1( LDA, * ), T2( LDA, * ), TAU( * ), * $ U( LDU, * ), UU( LDU, * ), UZ( LDU, * ), * $ WI1( * ), WI2( * ), WI3( * ), WORK( * ), @@ -54,10 +54,15 @@ *> SHSEIN computes the left and right eigenvector matrices *> Y and X for H, using inverse iteration. *> +*> STREVC3 computes left and right eigenvector matrices +*> from a Schur matrix T and backtransforms them with Z +*> to eigenvector matrices L and R for A. L and R are +*> GE matrices. +*> *> When SCHKHS is called, a number of matrix "sizes" ("n's") and a *> number of matrix "types" are specified. For each size ("n") *> and each type of matrix, one matrix will be generated and used -*> to test the nonsymmetric eigenroutines. For each matrix, 14 +*> to test the nonsymmetric eigenroutines. For each matrix, 16 *> tests will be performed: *> *> (1) | A - U H U**T | / ( |A| n ulp ) @@ -88,6 +93,10 @@ *> *> (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) *> +*> (15) | AR - RW | / ( |A| |R| ulp ) +*> +*> (16) | LA - WL | / ( |A| |L| ulp ) +*> *> The "sizes" are specified by an array NN(1:NSIZES); the value of *> each element NN(j) specifies one size. *> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); @@ -331,7 +340,7 @@ *> Workspace. *> Modified. *> -*> RESULT - REAL array, dimension (14) +*> RESULT - REAL array, dimension (16) *> The values computed by the fourteen tests described above. *> The values are currently limited to 1/ulp, to avoid *> overflow. @@ -423,7 +432,7 @@ SUBROUTINE SCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, INTEGER ISEED( 4 ), IWORK( * ), NN( * ) REAL A( LDA, * ), EVECTL( LDU, * ), $ EVECTR( LDU, * ), EVECTX( LDU, * ), - $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 14 ), + $ EVECTY( LDU, * ), H( LDA, * ), RESULT( 16 ), $ T1( LDA, * ), T2( LDA, * ), TAU( * ), $ U( LDU, * ), UU( LDU, * ), UZ( LDU, * ), $ WI1( * ), WI2( * ), WI3( * ), WORK( * ), @@ -461,7 +470,7 @@ SUBROUTINE SCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, EXTERNAL SCOPY, SGEHRD, SGEMM, SGET10, SGET22, SHSEIN, $ SHSEQR, SHST01, SLABAD, SLACPY, SLAFTS, SLASET, $ SLASUM, SLATME, SLATMR, SLATMS, SORGHR, SORMHR, - $ STREVC, XERBLA + $ STREVC, STREVC3, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, REAL, SQRT @@ -561,7 +570,7 @@ SUBROUTINE SCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * Initialize RESULT * - DO 30 J = 1, 14 + DO 30 J = 1, 16 RESULT( J ) = ZERO 30 CONTINUE * @@ -1108,6 +1117,64 @@ SUBROUTINE SCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, $ RESULT( 14 ) = DUMMA( 3 )*ANINV END IF * +* Compute Left and Right Eigenvectors of A +* +* Compute a Right eigenvector matrix: +* + NTEST = 15 + RESULT( 15 ) = ULPINV +* + CALL SLACPY( ' ', N, N, UZ, LDU, EVECTR, LDU ) +* + CALL STREVC3( 'Right', 'Back', SELECT, N, T1, LDA, DUMMA, + $ LDU, EVECTR, LDU, N, IN, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'STREVC3(R,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 15: | AR - RW | / ( |A| |R| ulp ) +* +* (from Schur decomposition) +* + CALL SGET22( 'N', 'N', 'N', N, A, LDA, EVECTR, LDU, WR1, + $ WI1, WORK, DUMMA( 1 ) ) + RESULT( 15 ) = DUMMA( 1 ) + IF( DUMMA( 2 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Right', 'STREVC3', + $ DUMMA( 2 ), N, JTYPE, IOLDSD + END IF +* +* Compute a Left eigenvector matrix: +* + NTEST = 16 + RESULT( 16 ) = ULPINV +* + CALL SLACPY( ' ', N, N, UZ, LDU, EVECTL, LDU ) +* + CALL STREVC3( 'Left', 'Back', SELECT, N, T1, LDA, EVECTL, + $ LDU, DUMMA, LDU, N, IN, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'STREVC3(L,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 16: | LA - WL | / ( |A| |L| ulp ) +* +* (from Schur decomposition) +* + CALL SGET22( 'Trans', 'N', 'Conj', N, A, LDA, EVECTL, LDU, + $ WR1, WI1, WORK, DUMMA( 3 ) ) + RESULT( 16 ) = DUMMA( 3 ) + IF( DUMMA( 4 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Left', 'STREVC3', DUMMA( 4 ), + $ N, JTYPE, IOLDSD + END IF +* * End of Loop -- Check for RESULT(j) > THRESH * 250 CONTINUE diff --git a/TESTING/EIG/zchkhs.f b/TESTING/EIG/zchkhs.f index 52962a0414..f5ae9b7f3c 100644 --- a/TESTING/EIG/zchkhs.f +++ b/TESTING/EIG/zchkhs.f @@ -21,7 +21,7 @@ * .. Array Arguments .. * LOGICAL DOTYPE( * ), SELECT( * ) * INTEGER ISEED( 4 ), IWORK( * ), NN( * ) -* DOUBLE PRECISION RESULT( 14 ), RWORK( * ) +* DOUBLE PRECISION RESULT( 16 ), RWORK( * ) * COMPLEX*16 A( LDA, * ), EVECTL( LDU, * ), * $ EVECTR( LDU, * ), EVECTX( LDU, * ), * $ EVECTY( LDU, * ), H( LDA, * ), T1( LDA, * ), @@ -64,10 +64,15 @@ *> eigenvectors of H. Y is lower triangular, and X is *> upper triangular. *> +*> ZTREVC3 computes left and right eigenvector matrices +*> from a Schur matrix T and backtransforms them with Z +*> to eigenvector matrices L and R for A. L and R are +*> GE matrices. +*> *> When ZCHKHS is called, a number of matrix "sizes" ("n's") and a *> number of matrix "types" are specified. For each size ("n") *> and each type of matrix, one matrix will be generated and used -*> to test the nonsymmetric eigenroutines. For each matrix, 14 +*> to test the nonsymmetric eigenroutines. For each matrix, 16 *> tests will be performed: *> *> (1) | A - U H U**H | / ( |A| n ulp ) @@ -98,6 +103,10 @@ *> *> (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) *> +*> (15) | AR - RW | / ( |A| |R| ulp ) +*> +*> (16) | LA - WL | / ( |A| |L| ulp ) +*> *> The "sizes" are specified by an array NN(1:NSIZES); the value of *> each element NN(j) specifies one size. *> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); @@ -331,7 +340,7 @@ *> Workspace. Could be equivalenced to IWORK, but not RWORK. *> Modified. *> -*> RESULT - DOUBLE PRECISION array, dimension (14) +*> RESULT - DOUBLE PRECISION array, dimension (16) *> The values computed by the fourteen tests described above. *> The values are currently limited to 1/ulp, to avoid *> overflow. @@ -421,7 +430,7 @@ SUBROUTINE ZCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * .. Array Arguments .. LOGICAL DOTYPE( * ), SELECT( * ) INTEGER ISEED( 4 ), IWORK( * ), NN( * ) - DOUBLE PRECISION RESULT( 14 ), RWORK( * ) + DOUBLE PRECISION RESULT( 16 ), RWORK( * ) COMPLEX*16 A( LDA, * ), EVECTL( LDU, * ), $ EVECTR( LDU, * ), EVECTX( LDU, * ), $ EVECTY( LDU, * ), H( LDA, * ), T1( LDA, * ), @@ -464,7 +473,7 @@ SUBROUTINE ZCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, EXTERNAL DLABAD, DLAFTS, DLASUM, XERBLA, ZCOPY, ZGEHRD, $ ZGEMM, ZGET10, ZGET22, ZHSEIN, ZHSEQR, ZHST01, $ ZLACPY, ZLASET, ZLATME, ZLATMR, ZLATMS, ZTREVC, - $ ZUNGHR, ZUNMHR + $ ZTREVC3, ZUNGHR, ZUNMHR * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, SQRT @@ -1067,6 +1076,66 @@ SUBROUTINE ZCHKHS( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, $ RESULT( 14 ) = DUMMA( 3 )*ANINV END IF * +* Compute Left and Right Eigenvectors of A +* +* Compute a Right eigenvector matrix: +* + NTEST = 15 + RESULT( 15 ) = ULPINV +* + CALL ZLACPY( ' ', N, N, UZ, LDU, EVECTR, LDU ) +* + CALL ZTREVC3( 'Right', 'Back', SELECT, N, T1, LDA, CDUMMA, + $ LDU, EVECTR, LDU, N, IN, WORK, NWORK, RWORK, + $ N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZTREVC3(R,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 15: | AR - RW | / ( |A| |R| ulp ) +* +* (from Schur decomposition) +* + CALL ZGET22( 'N', 'N', 'N', N, A, LDA, EVECTR, LDU, W1, + $ WORK, RWORK, DUMMA( 1 ) ) + RESULT( 15 ) = DUMMA( 1 ) + IF( DUMMA( 2 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Right', 'ZTREVC3', + $ DUMMA( 2 ), N, JTYPE, IOLDSD + END IF +* +* Compute a Left eigenvector matrix: +* + NTEST = 16 + RESULT( 16 ) = ULPINV +* + CALL ZLACPY( ' ', N, N, UZ, LDU, EVECTL, LDU ) +* + CALL ZTREVC3( 'Left', 'Back', SELECT, N, T1, LDA, EVECTL, + $ LDU, CDUMMA, LDU, N, IN, WORK, NWORK, RWORK, + $ N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZTREVC3(L,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + GO TO 250 + END IF +* +* Test 16: | LA - WL | / ( |A| |L| ulp ) +* +* (from Schur decomposition) +* + CALL ZGET22( 'Conj', 'N', 'Conj', N, A, LDA, EVECTL, LDU, + $ W1, WORK, RWORK, DUMMA( 3 ) ) + RESULT( 16 ) = DUMMA( 3 ) + IF( DUMMA( 4 ).GT.THRESH ) THEN + WRITE( NOUNIT, FMT = 9998 )'Left', 'ZTREVC3', DUMMA( 4 ), + $ N, JTYPE, IOLDSD + END IF +* * End of Loop -- Check for RESULT(j) > THRESH * 240 CONTINUE diff --git a/TESTING/EIG/zerred.f b/TESTING/EIG/zerred.f index d1219c02b9..1876c1f1d7 100644 --- a/TESTING/EIG/zerred.f +++ b/TESTING/EIG/zerred.f @@ -100,7 +100,7 @@ SUBROUTINE ZERRED( PATH, NUNIT ) * .. * .. External Subroutines .. EXTERNAL CHKXER, ZGEES, ZGEESX, ZGEEV, ZGEEVX, ZGESVJ, - $ ZGESDD, ZGESVD, ZGESVDX, ZGESVQ + $ ZGESDD, ZGESVD, ZGESVDX, ZGESVDQ * .. * .. External Functions .. LOGICAL LSAMEN, ZSLECT diff --git a/TESTING/EIG/zhet21.f b/TESTING/EIG/zhet21.f index d254f85e9c..b927a502fa 100644 --- a/TESTING/EIG/zhet21.f +++ b/TESTING/EIG/zhet21.f @@ -304,9 +304,9 @@ SUBROUTINE ZHET21( ITYPE, UPLO, N, KBAND, A, LDA, D, E, U, LDU, V, 10 CONTINUE * IF( N.GT.1 .AND. KBAND.EQ.1 ) THEN - DO 20 J = 2, N - 1 + DO 20 J = 1, N - 1 CALL ZHER2( CUPLO, N, -DCMPLX( E( J ) ), U( 1, J ), 1, - $ U( 1, J-1 ), 1, WORK, N ) + $ U( 1, J+1 ), 1, WORK, N ) 20 CONTINUE END IF WNORM = ZLANHE( '1', CUPLO, N, WORK, N, RWORK ) diff --git a/TESTING/EIG/zsyl01.f b/TESTING/EIG/zsyl01.f index 1e8619a34c..329f39dc4f 100644 --- a/TESTING/EIG/zsyl01.f +++ b/TESTING/EIG/zsyl01.f @@ -124,7 +124,7 @@ SUBROUTINE ZSYL01( THRESH, NFAIL, RMAX, NINFO, KNT ) $ C( MAXM, MAXN ), CC( MAXM, MAXN ), $ X( MAXM, MAXN ), $ DUML( MAXM ), DUMR( MAXN ), - $ D( MIN( MAXM, MAXN ) ) + $ D( MAX( MAXM, MAXN ) ) DOUBLE PRECISION SWORK( LDSWORK, 103 ), DUM( MAXN ), VM( 2 ) INTEGER ISEED( 4 ), IWORK( MAXM + MAXN + 2 ) * .. diff --git a/TESTING/LIN/alahd.f b/TESTING/LIN/alahd.f index 2cc0fba063..f0423a23b9 100644 --- a/TESTING/LIN/alahd.f +++ b/TESTING/LIN/alahd.f @@ -608,17 +608,18 @@ SUBROUTINE ALAHD( IOUNIT, PATH ) ELSE IF( LSAMEN( 2, P2, 'LS' ) ) THEN * * LS: Least Squares driver routines for -* LS, LSD, LSS, LSX and LSY. +* LS, LST, TSLS, LSD, LSS, LSX and LSY. * WRITE( IOUNIT, FMT = 9984 )PATH WRITE( IOUNIT, FMT = 9967 ) - WRITE( IOUNIT, FMT = 9921 )C1, C1, C1, C1 + WRITE( IOUNIT, FMT = 9921 )C1, C1, C1, C1, C1, C1 WRITE( IOUNIT, FMT = 9935 )1 WRITE( IOUNIT, FMT = 9931 )2 - WRITE( IOUNIT, FMT = 9933 )3 - WRITE( IOUNIT, FMT = 9935 )4 - WRITE( IOUNIT, FMT = 9934 )5 - WRITE( IOUNIT, FMT = 9932 )6 + WRITE( IOUNIT, FMT = 9919 ) + WRITE( IOUNIT, FMT = 9933 )7 + WRITE( IOUNIT, FMT = 9935 )8 + WRITE( IOUNIT, FMT = 9934 )9 + WRITE( IOUNIT, FMT = 9932 )10 WRITE( IOUNIT, FMT = 9920 ) WRITE( IOUNIT, FMT = '( '' Messages:'' )' ) * @@ -1048,10 +1049,11 @@ SUBROUTINE ALAHD( IOUNIT, PATH ) $ 'check if X is in the row space of A or A'' ', $ '(overdetermined case)' ) 9929 FORMAT( ' Test ratios (1-3: ', A1, 'TZRZF):' ) - 9920 FORMAT( 3X, ' 7-10: same as 3-6', 3X, ' 11-14: same as 3-6' ) - 9921 FORMAT( ' Test ratios:', / ' (1-2: ', A1, 'GELS, 3-6: ', A1, - $ 'GELSY, 7-10: ', A1, 'GELSS, 11-14: ', A1, 'GELSD, 15-16: ', - $ A1, 'GETSLS)') + 9919 FORMAT( 3X, ' 3-4: same as 1-2', 3X, ' 5-6: same as 1-2' ) + 9920 FORMAT( 3X, ' 11-14: same as 7-10', 3X, ' 15-18: same as 7-10' ) + 9921 FORMAT( ' Test ratios:', / ' (1-2: ', A1, 'GELS, 3-4: ', A1, + $ 'GELST, 5-6: ', A1, 'GETSLS, 7-10: ', A1, 'GELSY, 11-14: ', + $ A1, 'GETSS, 15-18: ', A1, 'GELSD)' ) 9928 FORMAT( 7X, 'where ALPHA = ( 1 + SQRT( 17 ) ) / 8' ) 9927 FORMAT( 3X, I2, ': ABS( Largest element in L )', / 12X, $ ' - ( 1 / ( 1 - ALPHA ) ) + THRESH' ) diff --git a/TESTING/LIN/cchktr.f b/TESTING/LIN/cchktr.f index c55b076437..4b09361d8b 100644 --- a/TESTING/LIN/cchktr.f +++ b/TESTING/LIN/cchktr.f @@ -541,7 +541,7 @@ SUBROUTINE CCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, * SRNAMT = 'CLATRS3' CALL CCOPY( N, X, 1, B, 1 ) - CALL CCOPY( N, X, 1, B, 1 ) + CALL CCOPY( N, X, 1, B( N+1 ), 1 ) CALL CSCAL( N, BIGNUM, B( N+1 ), 1 ) CALL CLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA, $ B, MAX(1, N), SCALE3, RWORK, WORK, NMAX, @@ -551,7 +551,7 @@ SUBROUTINE CCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, * IF( INFO.NE.0 ) $ CALL ALAERH( PATH, 'CLATRS3', INFO, 0, - $ UPLO // TRANS // DIAG // 'Y', N, N, + $ UPLO // TRANS // DIAG // 'N', N, N, $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, $ SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA, @@ -559,7 +559,7 @@ SUBROUTINE CCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, CALL CSSCAL( N, BIGNUM, X, 1 ) CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, $ SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA, - $ X, LDA, WORK, RESULT( 10 ) ) + $ X, LDA, WORK, RES ) RESULT( 10 ) = MAX( RESULT( 10 ), RES ) * * Print information about the tests that did not pass diff --git a/TESTING/LIN/cdrvls.f b/TESTING/LIN/cdrvls.f index 7fe189e5fd..ecba705d5f 100644 --- a/TESTING/LIN/cdrvls.f +++ b/TESTING/LIN/cdrvls.f @@ -31,7 +31,8 @@ *> *> \verbatim *> -*> CDRVLS tests the least squares driver routines CGELS, CGETSLS, CGELSS, CGELSY +*> CDRVLS tests the least squares driver routines CGELS, CGELST, +*> CGETSLS, CGELSS, CGELSY *> and CGELSD. *> \endverbatim * @@ -211,7 +212,7 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) REAL ONE, ZERO @@ -228,8 +229,8 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, LRWORK, - $ LWORK_CGELS, LWORK_CGETSLS, LWORK_CGELSS, - $ LWORK_CGELSY, LWORK_CGELSD, + $ LWORK_CGELS, LWORK_CGELST, LWORK_CGETSLS, + $ LWORK_CGELSS, LWORK_CGELSY, LWORK_CGELSD, $ LRWORK_CGELSY, LRWORK_CGELSS, LRWORK_CGELSD REAL EPS, NORMA, NORMB, RCOND * .. @@ -249,7 +250,7 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, CERRLS, CGELS, CGELSD, - $ CGELSS, CGELSY, CGEMM, CGETSLS, CLACPY, + $ CGELSS, CGELST, CGELSY, CGEMM, CGETSLS, CLACPY, $ CLARNV, CQRT13, CQRT15, CQRT16, CSSCAL, $ SAXPY, XLAENV * .. @@ -334,7 +335,8 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -361,6 +363,10 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL CGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_CGELS = INT( WQ( 1 ) ) +* Compute workspace needed for CGELST + CALL CGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_CGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for CGETSLS CALL CGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -425,21 +431,26 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 100 -* +* ===================================================== +* Begin test CGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test CGELS -* * Generate a matrix of scaling type ISCALE * CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -484,15 +495,20 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for CGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL CQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, RWORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for CGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN @@ -515,7 +531,7 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -524,26 +540,34 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test CGETSLS + END DO + END DO + END IF +* ===================================================== +* End test CGELS +* ===================================================== +* ===================================================== +* Begin test CGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) + CALL XLAENV( 3, NXVAL( INB ) ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -560,9 +584,9 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, IF( NCOLS.GT.0 ) THEN CALL CLARNV( 2, ISEED, NCOLS*NRHS, $ WORK ) - CALL CSCAL( NCOLS*NRHS, - $ CONE / REAL( NCOLS ), WORK, - $ 1 ) + CALL CSSCAL( NCOLS*NRHS, + $ ONE / REAL( NCOLS ), WORK, + $ 1 ) END IF CALL CGEMM( TRANS, 'No transpose', NROWS, $ NRHS, NCOLS, CONE, COPYA, LDA, @@ -578,31 +602,37 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'CGETSLS ' - CALL CGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'CGELST' + CALL CGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) +* IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'CGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'CGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for CGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL CQRT16( TRANS, M, N, NRHS, COPYA, - $ LDA, B, LDB, C, LDB, WORK2, - $ RESULT( 15 ) ) + $ LDA, B, LDB, C, LDB, RWORK, + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for CGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * * Solving LS system * - RESULT( 16 ) = CQRT17( TRANS, 1, M, N, + RESULT( 4 ) = CQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -610,7 +640,7 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * Solving overdetermined system * - RESULT( 16 ) = CQRT14( TRANS, M, N, + RESULT( 4 ) = CQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -618,21 +648,151 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 )TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO + END IF +* ===================================================== +* End test CGELST +* ===================================================== +* ===================================================== +* Begin test CGELSTSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO INB = 1, NNB + MB = NBVAL( INB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO IMB = 1, NNB + NB = NBVAL( IMB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'C' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL CLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL CSCAL( NCOLS*NRHS, + $ CONE / REAL( NCOLS ), + $ WORK, 1 ) + END IF + CALL CGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, CONE, + $ COPYA, LDA, WORK, LDWORK, + $ CZERO, B, LDB ) + CALL CLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL CLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL CLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'CGETSLS ' + CALL CGETSLS( TRANS, M, N, NRHS, A, + $ LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'CGETSLS ', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for CGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL CLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL CQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK2, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for CGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = CQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = CQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, B, + $ LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 )TRANS, + $ M, N, NRHS, MB, NB, ITYPE, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO END IF +* ===================================================== +* End test CGELSTSLS +* ==================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -680,37 +840,37 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * workspace used: 2*MNMIN+NB*NB+NB*MAX(N,NRHS) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = CQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = CQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK, RWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 4 ) ) + $ RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = CQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = CQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = CQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = CQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -736,38 +896,38 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 8 ) ) + $ RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = CQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = CQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = CQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = CQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -792,45 +952,45 @@ SUBROUTINE CDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 12 ) ) + $ RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = CQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = CQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = CQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = CQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 80 K = 3, 14 + DO 80 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) diff --git a/TESTING/LIN/cdrvrf3.f b/TESTING/LIN/cdrvrf3.f index 1ca816979a..d0edf75e10 100644 --- a/TESTING/LIN/cdrvrf3.f +++ b/TESTING/LIN/cdrvrf3.f @@ -156,9 +156,10 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, REAL RESULT( NTESTS ) * .. * .. External Functions .. + LOGICAL LSAME REAL SLAMCH, CLANGE COMPLEX CLARND - EXTERNAL SLAMCH, CLARND, CLANGE + EXTERNAL SLAMCH, CLARND, CLANGE, LSAME * .. * .. External Subroutines .. EXTERNAL CTRTTF, CGEQRF, CGEQLF, CTFSM, CTRSM @@ -222,9 +223,9 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO 100 IALPHA = 1, 3 * - IF ( IALPHA.EQ. 1) THEN + IF ( IALPHA.EQ.1 ) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 2) THEN + ELSE IF ( IALPHA.EQ.2 ) THEN ALPHA = ONE ELSE ALPHA = CLARND( 4, ISEED ) @@ -263,7 +264,7 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, NA DO I = 1, NA - A( I, J) = CLARND( 4, ISEED ) + A( I, J ) = CLARND( 4, ISEED ) END DO END DO * @@ -276,6 +277,20 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL CGEQRF( NA, NA, A, LDA, TAU, + C_WORK_CGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO J = 1, NA + DO I = 1, J + A( I, J ) = A( I, J ) / + + ( 2.0 * A( J, J ) ) + END DO + END DO + END IF +* ELSE * * The case IUPLO.EQ.2 is when SIDE.EQ.'L' @@ -285,6 +300,20 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL CGELQF( NA, NA, A, LDA, TAU, + C_WORK_CGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO I = 1, NA + DO J = 1, I + A( I, J ) = A( I, J ) / + + ( 2.0 * A( I, I ) ) + END DO + END DO + END IF +* END IF * * After the QR factorization, the diagonal @@ -293,7 +322,8 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * value 1.0E+00. * DO J = 1, NA - A( J, J) = A(J,J) * CLARND( 5, ISEED ) + A( J, J ) = A( J, J ) * + + CLARND( 5, ISEED ) END DO * * Store a copy of A in RFP format (in ARF). @@ -307,8 +337,8 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = CLARND( 4, ISEED ) - B2( I, J) = B1( I, J) + B1( I, J ) = CLARND( 4, ISEED ) + B2( I, J ) = B1( I, J ) END DO END DO * @@ -331,24 +361,24 @@ SUBROUTINE CDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = B2( I, J ) - B1( I, J ) + B1( I, J ) = B2( I, J ) - B1( I, J ) END DO END DO * - RESULT(1) = CLANGE( 'I', M, N, B1, LDA, + RESULT( 1 ) = CLANGE( 'I', M, N, B1, LDA, + S_WORK_CLANGE ) * - RESULT(1) = RESULT(1) / SQRT( EPS ) - + / MAX ( MAX( M, N), 1 ) + RESULT( 1 ) = RESULT( 1 ) / SQRT( EPS ) + + / MAX ( MAX( M, N ), 1 ) * - IF( RESULT(1).GE.THRESH ) THEN + IF( RESULT( 1 ).GE.THRESH ) THEN IF( NFAIL.EQ.0 ) THEN WRITE( NOUT, * ) WRITE( NOUT, FMT = 9999 ) END IF WRITE( NOUT, FMT = 9997 ) 'CTFSM', + CFORM, SIDE, UPLO, TRANS, DIAG, M, - + N, RESULT(1) + + N, RESULT( 1 ) NFAIL = NFAIL + 1 END IF * diff --git a/TESTING/LIN/cerrls.f b/TESTING/LIN/cerrls.f index 48e44ad863..fca9439181 100644 --- a/TESTING/LIN/cerrls.f +++ b/TESTING/LIN/cerrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> CERRLS tests the error exits for the COMPLEX least squares -*> driver routines (CGELS, CGELSS, CGELSY, CGELSD). +*> driver routines (CGELS, CGELST, CGETSLS, CGELSS, CGELSY, CGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ SUBROUTINE CERRLS( PATH, NUNIT ) EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CGELS, CGELSD, CGELSS, CGELSY, CHKXER + EXTERNAL ALAESM, CHKXER, CGELS, CGELSD, CGELSS, CGELST, + $ CGELSY, CGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ SUBROUTINE CERRLS( PATH, NUNIT ) INFOT = 8 CALL CGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'CGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'CGELS ', INFOT, NOUT, LERR, OK ) * +* CGELST +* + SRNAMT = 'CGELST' + INFOT = 1 + CALL CGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) +* +* CGETSLS +* + SRNAMT = 'CGETSLS' + INFOT = 1 + CALL CGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) +* * CGELSS * SRNAMT = 'CGELSS' diff --git a/TESTING/LIN/ddrvls.f b/TESTING/LIN/ddrvls.f index b64930c10c..b3d07d67f2 100644 --- a/TESTING/LIN/ddrvls.f +++ b/TESTING/LIN/ddrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> DDRVLS tests the least squares driver routines DGELS, DGETSLS, DGELSS, DGELSY, -*> and DGELSD. +*> DDRVLS tests the least squares driver routines DGELS, DGELST, +*> DGETSLS, DGELSS, DGELSY, and DGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) DOUBLE PRECISION ONE, TWO, ZERO @@ -225,8 +225,8 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, - $ LWORK_DGELS, LWORK_DGETSLS, LWORK_DGELSS, - $ LWORK_DGELSY, LWORK_DGELSD + $ LWORK_DGELS, LWORK_DGELST, LWORK_DGETSLS, + $ LWORK_DGELSS, LWORK_DGELSY, LWORK_DGELSD DOUBLE PRECISION EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. @@ -243,12 +243,12 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, DERRLS, DGELS, - $ DGELSD, DGELSS, DGELSY, DGEMM, DLACPY, - $ DLARNV, DLASRT, DQRT13, DQRT15, DQRT16, DSCAL, - $ XLAENV + $ DGELSD, DGELSS, DGELST, DGELSY, DGEMM, + $ DGETSLS, DLACPY, DLARNV, DQRT13, DQRT15, + $ DQRT16, DSCAL, XLAENV * .. * .. Intrinsic Functions .. - INTRINSIC DBLE, INT, LOG, MAX, MIN, SQRT + INTRINSIC DBLE, INT, MAX, MIN, SQRT * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -330,7 +330,8 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -357,6 +358,10 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL DGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_DGELS = INT ( WQ ( 1 ) ) +* Compute workspace needed for DGELST + CALL DGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_DGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for DGETSLS CALL DGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -378,9 +383,9 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Compute LIWORK workspace needed for DGELSY and DGELSD LIWORK = MAX( LIWORK, N, IWQ( 1 ) ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_DGELS, LWORK_DGETSLS, - $ LWORK_DGELSY, LWORK_DGELSS, - $ LWORK_DGELSD ) + LWORK = MAX( LWORK, LWORK_DGELS, LWORK_DGELST, + $ LWORK_DGETSLS, LWORK_DGELSY, + $ LWORK_DGELSS, LWORK_DGELSD ) END IF ENDDO ENDDO @@ -411,21 +416,26 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 110 -* +* ===================================================== +* Begin test DGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test DGELS -* * Generate a matrix of scaling type ISCALE * CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -469,20 +479,27 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for DGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL DQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for DGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * RESULT( 2 ) = DQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, @@ -500,35 +517,42 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9999 )TRANS, M, + WRITE( NOUT, FMT = 9999 ) TRANS, M, $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test DGETSLS + END DO + END DO + END IF +* ===================================================== +* End test DGELS +* ===================================================== +* ===================================================== +* Begin test DGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -563,31 +587,38 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'DGETSLS ' - CALL DGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'DGELST' + CALL DGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'DGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'DGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for DGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL DQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, - $ RESULT( 15 ) ) + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for DGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * - RESULT( 16 ) = DQRT17( TRANS, 1, M, N, + RESULT( 4 ) = DQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -595,7 +626,7 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * Solving overdetermined system * - RESULT( 16 ) = DQRT14( TRANS, M, N, + RESULT( 4 ) = DQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -603,21 +634,151 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 ) TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO + END IF +* ===================================================== +* End test DGELST +* ===================================================== +* ===================================================== +* Begin test DGETSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO IMB = 1, NNB + MB = NBVAL( IMB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'T' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL DLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL DSCAL( NCOLS*NRHS, + $ ONE / DBLE( NCOLS ), + $ WORK, 1 ) + END IF + CALL DGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, ONE, + $ COPYA, LDA, WORK, LDWORK, + $ ZERO, B, LDB ) + CALL DLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL DLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL DLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'DGETSLS' + CALL DGETSLS( TRANS, M, N, NRHS, + $ A, LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'DGETSLS', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for DGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL DLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL DQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for DGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = DQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = DQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, + $ B, LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 ) TRANS, + $ M, N, NRHS, MB, NB, ITYPE, + $ K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO END IF +* ===================================================== +* End test DGETSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -662,37 +823,37 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = DQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = DQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 4 ) ) + $ WORK( M*NRHS+1 ), RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -716,38 +877,38 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 8 ) ) + $ WORK( M*NRHS+1 ), RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -776,45 +937,45 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 12 ) ) + $ WORK( M*NRHS+1 ), RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 90 K = 3, 14 + DO 90 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -826,6 +987,12 @@ SUBROUTINE DDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, NRUN = NRUN + 12 * 100 CONTINUE + + + + + + 110 CONTINUE 120 CONTINUE 130 CONTINUE diff --git a/TESTING/LIN/ddrvrf3.f b/TESTING/LIN/ddrvrf3.f index 1c5d74aea0..ef823c2e75 100644 --- a/TESTING/LIN/ddrvrf3.f +++ b/TESTING/LIN/ddrvrf3.f @@ -153,8 +153,9 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Functions .. + LOGICAL LSAME DOUBLE PRECISION DLAMCH, DLANGE, DLARND - EXTERNAL DLAMCH, DLANGE, DLARND + EXTERNAL DLAMCH, DLANGE, DLARND, LSAME * .. * .. External Subroutines .. EXTERNAL DTRTTF, DGEQRF, DGEQLF, DTFSM, DTRSM @@ -218,9 +219,9 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO 100 IALPHA = 1, 3 * - IF ( IALPHA.EQ. 1) THEN + IF ( IALPHA.EQ.1 ) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 2) THEN + ELSE IF ( IALPHA.EQ.2 ) THEN ALPHA = ONE ELSE ALPHA = DLARND( 2, ISEED ) @@ -259,7 +260,7 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, NA DO I = 1, NA - A( I, J) = DLARND( 2, ISEED ) + A( I, J ) = DLARND( 2, ISEED ) END DO END DO * @@ -272,6 +273,20 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL DGEQRF( NA, NA, A, LDA, TAU, + D_WORK_DGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO J = 1, NA + DO I = 1, J + A( I, J ) = A( I, J ) / + + ( 2.0 * A( J, J ) ) + END DO + END DO + END IF +* ELSE * * The case IUPLO.EQ.2 is when SIDE.EQ.'L' @@ -281,6 +296,20 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL DGELQF( NA, NA, A, LDA, TAU, + D_WORK_DGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO I = 1, NA + DO J = 1, I + A( I, J ) = A( I, J ) / + + ( 2.0 * A( I, I ) ) + END DO + END DO + END IF +* END IF * * Store a copy of A in RFP format (in ARF). @@ -294,8 +323,8 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = DLARND( 2, ISEED ) - B2( I, J) = B1( I, J) + B1( I, J ) = DLARND( 2, ISEED ) + B2( I, J ) = B1( I, J ) END DO END DO * @@ -318,24 +347,24 @@ SUBROUTINE DDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = B2( I, J ) - B1( I, J ) + B1( I, J ) = B2( I, J ) - B1( I, J ) END DO END DO * - RESULT(1) = DLANGE( 'I', M, N, B1, LDA, + RESULT( 1 ) = DLANGE( 'I', M, N, B1, LDA, + D_WORK_DLANGE ) * - RESULT(1) = RESULT(1) / SQRT( EPS ) - + / MAX ( MAX( M, N), 1 ) + RESULT( 1 ) = RESULT( 1 ) / SQRT( EPS ) + + / MAX ( MAX( M, N ), 1 ) * - IF( RESULT(1).GE.THRESH ) THEN + IF( RESULT( 1 ).GE.THRESH ) THEN IF( NFAIL.EQ.0 ) THEN WRITE( NOUT, * ) WRITE( NOUT, FMT = 9999 ) END IF WRITE( NOUT, FMT = 9997 ) 'DTFSM', + CFORM, SIDE, UPLO, TRANS, DIAG, M, - + N, RESULT(1) + + N, RESULT( 1 ) NFAIL = NFAIL + 1 END IF * diff --git a/TESTING/LIN/derrls.f b/TESTING/LIN/derrls.f index a1f74dec23..09d745238e 100644 --- a/TESTING/LIN/derrls.f +++ b/TESTING/LIN/derrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> DERRLS tests the error exits for the DOUBLE PRECISION least squares -*> driver routines (DGELS, SGELSS, SGELSY, SGELSD). +*> driver routines (DGELS, DGELST, DGETSLS, SGELSS, SGELSY, SGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ SUBROUTINE DERRLS( PATH, NUNIT ) EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, DGELS, DGELSD, DGELSS, DGELSY + EXTERNAL ALAESM, CHKXER, DGELS, DGELSD, DGELSS, DGELST, + $ DGELSY, DGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ SUBROUTINE DERRLS( PATH, NUNIT ) INFOT = 8 CALL DGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'DGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'DGELS ', INFOT, NOUT, LERR, OK ) * +* DGELST +* + SRNAMT = 'DGELST' + INFOT = 1 + CALL DGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) +* +* DGETSLS +* + SRNAMT = 'DGETSLS' + INFOT = 1 + CALL DGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) +* * DGELSS * SRNAMT = 'DGELSS' diff --git a/TESTING/LIN/schktr.f b/TESTING/LIN/schktr.f index 5aeb1ce88c..92d8761087 100644 --- a/TESTING/LIN/schktr.f +++ b/TESTING/LIN/schktr.f @@ -555,11 +555,11 @@ SUBROUTINE SCHKTR( DOTYPE, NN, NVAL, NNB, NBVAL, NNS, NSVAL, * IF( INFO.NE.0 ) $ CALL ALAERH( PATH, 'SLATRS3', INFO, 0, - $ UPLO // TRANS // DIAG // 'Y', N, N, + $ UPLO // TRANS // DIAG // 'N', N, N, $ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT ) * CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, - $ SCALE3 ( 1 ), RWORK, ONE, B( N+1 ), LDA, + $ SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA, $ X, LDA, WORK, RESULT( 10 ) ) CALL SSCAL( N, BIGNUM, X, 1 ) CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA, diff --git a/TESTING/LIN/sdrvls.f b/TESTING/LIN/sdrvls.f index b964515037..2baf9a3fb1 100644 --- a/TESTING/LIN/sdrvls.f +++ b/TESTING/LIN/sdrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> SDRVLS tests the least squares driver routines SGELS, SGETSLS, SGELSS, SGELSY, -*> and SGELSD. +*> SDRVLS tests the least squares driver routines SGELS, SGELST, +*> SGETSLS, SGELSS, SGELSY and SGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) REAL ONE, TWO, ZERO @@ -225,8 +225,8 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, - $ LWORK_SGELS, LWORK_SGETSLS, LWORK_SGELSS, - $ LWORK_SGELSY, LWORK_SGELSD + $ LWORK_SGELS, LWORK_SGELST, LWORK_SGETSLS, + $ LWORK_SGELSS, LWORK_SGELSY, LWORK_SGELSD REAL EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. @@ -243,12 +243,12 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, SAXPY, SERRLS, SGELS, - $ SGELSD, SGELSS, SGELSY, SGEMM, SLACPY, - $ SLARNV, SQRT13, SQRT15, SQRT16, SSCAL, - $ XLAENV, SGETSLS + $ SGELSD, SGELSS, SGELST, SGELSY, SGEMM, + $ SGETSLS, SLACPY, SLARNV, SQRT13, SQRT15, + $ SQRT16, SSCAL, XLAENV * .. * .. Intrinsic Functions .. - INTRINSIC INT, LOG, MAX, MIN, REAL, SQRT + INTRINSIC INT, MAX, MIN, REAL, SQRT * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -330,7 +330,8 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -357,6 +358,10 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL SGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ( 1 ), -1, INFO ) LWORK_SGELS = INT ( WQ( 1 ) ) +* Compute workspace needed for SGELST + CALL SGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_SGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for SGETSLS CALL SGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ( 1 ), -1, INFO ) @@ -378,9 +383,9 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Compute LIWORK workspace needed for SGELSY and SGELSD LIWORK = MAX( LIWORK, N, IWQ( 1 ) ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_SGELS, LWORK_SGETSLS, - $ LWORK_SGELSY, LWORK_SGELSS, - $ LWORK_SGELSD ) + LWORK = MAX( LWORK, LWORK_SGELS, LWORK_SGELST, + $ LWORK_SGETSLS, LWORK_SGELSY, + $ LWORK_SGELSS, LWORK_SGELSD ) END IF ENDDO ENDDO @@ -411,21 +416,26 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 110 -* +* ===================================================== +* Begin test SGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test SGELS -* * Generate a matrix of scaling type ISCALE * CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -469,20 +479,27 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for SGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL SQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for SGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * RESULT( 2 ) = SQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, @@ -500,7 +517,7 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -509,26 +526,33 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test SGETSLS + END DO + END DO + END IF +* ===================================================== +* End test SGELS +* ===================================================== +* ===================================================== +* Begin test SGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) -* - DO 60 ITRAN = 1, 2 +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -563,31 +587,38 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'SGETSLS ' - CALL SGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'SGELST' + CALL SGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'SGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'SGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for SGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL SQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, - $ RESULT( 15 ) ) + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for SGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * - RESULT( 16 ) = SQRT17( TRANS, 1, M, N, + RESULT( 4 ) = SQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -595,7 +626,7 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * Solving overdetermined system * - RESULT( 16 ) = SQRT14( TRANS, M, N, + RESULT( 4 ) = SQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -603,21 +634,151 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 ) TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO END IF +* ===================================================== +* End test SGELST +* ===================================================== +* ===================================================== +* Begin test SGETSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO IMB = 1, NNB + MB = NBVAL( IMB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'T' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL SLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL SSCAL( NCOLS*NRHS, + $ ONE / REAL( NCOLS ), + $ WORK, 1 ) + END IF + CALL SGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, ONE, + $ COPYA, LDA, WORK, LDWORK, + $ ZERO, B, LDB ) + CALL SLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL SLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL SLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'SGETSLS' + CALL SGETSLS( TRANS, M, N, NRHS, + $ A, LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'SGETSLS', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for SGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL SLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL SQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for SGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = SQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = SQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, + $ B, LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 ) TRANS, + $ M, N, NRHS, MB, NB, ITYPE, + $ K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO + END IF +* ===================================================== +* End test SGETSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -662,37 +823,37 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = SQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = SQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 4 ) ) + $ WORK( M*NRHS+1 ), RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -716,38 +877,38 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 8 ) ) + $ WORK( M*NRHS+1 ), RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -776,45 +937,45 @@ SUBROUTINE SDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 12 ) ) + $ WORK( M*NRHS+1 ), RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 90 K = 3, 14 + DO 90 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) diff --git a/TESTING/LIN/sdrvrf3.f b/TESTING/LIN/sdrvrf3.f index 5faae27337..bc01d8473b 100644 --- a/TESTING/LIN/sdrvrf3.f +++ b/TESTING/LIN/sdrvrf3.f @@ -153,8 +153,9 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, REAL RESULT( NTESTS ) * .. * .. External Functions .. + LOGICAL LSAME REAL SLAMCH, SLANGE, SLARND - EXTERNAL SLAMCH, SLANGE, SLARND + EXTERNAL SLAMCH, SLANGE, SLARND, LSAME * .. * .. External Subroutines .. EXTERNAL STRTTF, SGEQRF, SGEQLF, STFSM, STRSM @@ -218,9 +219,9 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO 100 IALPHA = 1, 3 * - IF ( IALPHA.EQ. 1) THEN + IF ( IALPHA.EQ.1 ) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 2) THEN + ELSE IF ( IALPHA.EQ.2 ) THEN ALPHA = ONE ELSE ALPHA = SLARND( 2, ISEED ) @@ -259,7 +260,7 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, NA DO I = 1, NA - A( I, J) = SLARND( 2, ISEED ) + A( I, J ) = SLARND( 2, ISEED ) END DO END DO * @@ -272,6 +273,20 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL SGEQRF( NA, NA, A, LDA, TAU, + S_WORK_SGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO J = 1, NA + DO I = 1, J + A( I, J ) = A( I, J ) / + + ( 2.0 * A( J, J ) ) + END DO + END DO + END IF +* ELSE * * The case IUPLO.EQ.2 is when SIDE.EQ.'L' @@ -281,6 +296,20 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL SGELQF( NA, NA, A, LDA, TAU, + S_WORK_SGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO I = 1, NA + DO J = 1, I + A( I, J ) = A( I, J ) / + + ( 2.0 * A( I, I ) ) + END DO + END DO + END IF +* END IF * * Store a copy of A in RFP format (in ARF). @@ -294,8 +323,8 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = SLARND( 2, ISEED ) - B2( I, J) = B1( I, J) + B1( I, J ) = SLARND( 2, ISEED ) + B2( I, J ) = B1( I, J ) END DO END DO * @@ -318,24 +347,24 @@ SUBROUTINE SDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = B2( I, J ) - B1( I, J ) + B1( I, J ) = B2( I, J ) - B1( I, J ) END DO END DO * - RESULT(1) = SLANGE( 'I', M, N, B1, LDA, + RESULT( 1 ) = SLANGE( 'I', M, N, B1, LDA, + S_WORK_SLANGE ) * - RESULT(1) = RESULT(1) / SQRT( EPS ) - + / MAX ( MAX( M, N), 1 ) + RESULT( 1 ) = RESULT( 1 ) / SQRT( EPS ) + + / MAX ( MAX( M, N ), 1 ) * - IF( RESULT(1).GE.THRESH ) THEN + IF( RESULT( 1 ).GE.THRESH ) THEN IF( NFAIL.EQ.0 ) THEN WRITE( NOUT, * ) WRITE( NOUT, FMT = 9999 ) END IF WRITE( NOUT, FMT = 9997 ) 'STFSM', + CFORM, SIDE, UPLO, TRANS, DIAG, M, - + N, RESULT(1) + + N, RESULT( 1 ) NFAIL = NFAIL + 1 END IF * diff --git a/TESTING/LIN/serrls.f b/TESTING/LIN/serrls.f index e6ee4360f9..6c4820066a 100644 --- a/TESTING/LIN/serrls.f +++ b/TESTING/LIN/serrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> SERRLS tests the error exits for the REAL least squares -*> driver routines (SGELS, SGELSS, SGELSY, SGELSD). +*> driver routines (SGELS, SGELST, SGETSLS, SGELSS, SGELSY, SGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ SUBROUTINE SERRLS( PATH, NUNIT ) EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, SGELS, SGELSD, SGELSS, SGELSY + EXTERNAL ALAESM, CHKXER, SGELS, SGELSD, SGELSS, SGELST, + $ SGELSY, SGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ SUBROUTINE SERRLS( PATH, NUNIT ) INFOT = 8 CALL SGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'SGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'SGELS ', INFOT, NOUT, LERR, OK ) * +* SGELST +* + SRNAMT = 'SGELST' + INFOT = 1 + CALL SGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) +* +* SGETSLS +* + SRNAMT = 'SGETSLS' + INFOT = 1 + CALL SGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) +* * SGELSS * SRNAMT = 'SGELSS' diff --git a/TESTING/LIN/zdrvls.f b/TESTING/LIN/zdrvls.f index 2eab979054..b21345d302 100644 --- a/TESTING/LIN/zdrvls.f +++ b/TESTING/LIN/zdrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> ZDRVLS tests the least squares driver routines ZGELS, ZGETSLS, ZGELSS, ZGELSY -*> and ZGELSD. +*> ZDRVLS tests the least squares driver routines ZGELS, ZGELST, +*> ZGETSLS, ZGELSS, ZGELSY and ZGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) DOUBLE PRECISION ONE, ZERO @@ -228,8 +228,8 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, LRWORK, - $ LWORK_ZGELS, LWORK_ZGETSLS, LWORK_ZGELSS, - $ LWORK_ZGELSY, LWORK_ZGELSD, + $ LWORK_ZGELS, LWORK_ZGELST, LWORK_ZGETSLS, + $ LWORK_ZGELSS, LWORK_ZGELSY, LWORK_ZGELSD, $ LRWORK_ZGELSY, LRWORK_ZGELSS, LRWORK_ZGELSD DOUBLE PRECISION EPS, NORMA, NORMB, RCOND * .. @@ -248,10 +248,10 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, EXTERNAL DASUM, DLAMCH, ZQRT12, ZQRT14, ZQRT17 * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, DLASRT, XLAENV, - $ ZDSCAL, ZERRLS, ZGELS, ZGELSD, ZGELSS, - $ ZGELSY, ZGEMM, ZLACPY, ZLARNV, ZQRT13, ZQRT15, - $ ZQRT16, ZGETSLS + EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, ZERRLS, ZGELS, + $ ZGELSD, ZGELSS, ZGELST, ZGELSY, ZGEMM, + $ ZGETSLS, ZLACPY, ZLARNV, ZQRT13, ZQRT15, + $ ZQRT16, ZDSCAL, XLAENV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, INT, SQRT @@ -334,7 +334,8 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -361,6 +362,10 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL ZGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_ZGELS = INT ( WQ( 1 ) ) +* Compute workspace needed for ZGELST + CALL ZGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_ZGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for ZGETSLS CALL ZGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -390,9 +395,9 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, LRWORK = MAX( LRWORK, LRWORK_ZGELSY, $ LRWORK_ZGELSS, LRWORK_ZGELSD ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_ZGELS, LWORK_ZGETSLS, - $ LWORK_ZGELSY, LWORK_ZGELSS, - $ LWORK_ZGELSD ) + LWORK = MAX( LWORK, LWORK_ZGELS, LWORK_ZGELST, + $ LWORK_ZGETSLS, LWORK_ZGELSY, + $ LWORK_ZGELSS, LWORK_ZGELSD ) END IF ENDDO ENDDO @@ -425,21 +430,26 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 100 -* +* ===================================================== +* Begin test ZGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test ZGELS -* * Generate a matrix of scaling type ISCALE * CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -484,15 +494,20 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for ZGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL ZQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, RWORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for ZGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN @@ -515,7 +530,7 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -524,26 +539,34 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test ZGETSLS + END DO + END DO + END IF +* ===================================================== +* End test ZGELS +* ===================================================== +* ===================================================== +* Begin test ZGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) + CALL XLAENV( 3, NXVAL( INB ) ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -560,9 +583,9 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, IF( NCOLS.GT.0 ) THEN CALL ZLARNV( 2, ISEED, NCOLS*NRHS, $ WORK ) - CALL ZSCAL( NCOLS*NRHS, - $ CONE / DBLE( NCOLS ), WORK, - $ 1 ) + CALL ZDSCAL( NCOLS*NRHS, + $ ONE / DBLE( NCOLS ), WORK, + $ 1 ) END IF CALL ZGEMM( TRANS, 'No transpose', NROWS, $ NRHS, NCOLS, CONE, COPYA, LDA, @@ -578,31 +601,37 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'ZGETSLS ' - CALL ZGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'ZGELST' + CALL ZGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) +* IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'ZGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'ZGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for ZGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL ZQRT16( TRANS, M, N, NRHS, COPYA, - $ LDA, B, LDB, C, LDB, WORK2, - $ RESULT( 15 ) ) + $ LDA, B, LDB, C, LDB, RWORK, + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for ZGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * * Solving LS system * - RESULT( 16 ) = ZQRT17( TRANS, 1, M, N, + RESULT( 4 ) = ZQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -610,7 +639,7 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * Solving overdetermined system * - RESULT( 16 ) = ZQRT14( TRANS, M, N, + RESULT( 4 ) = ZQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -618,21 +647,151 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 )TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO + END IF +* ===================================================== +* End test ZGELST +* ===================================================== +* ===================================================== +* Begin test ZGELSTSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO INB = 1, NNB + MB = NBVAL( INB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO IMB = 1, NNB + NB = NBVAL( IMB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'C' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL ZLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL ZSCAL( NCOLS*NRHS, + $ CONE / DBLE( NCOLS ), + $ WORK, 1 ) + END IF + CALL ZGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, CONE, + $ COPYA, LDA, WORK, LDWORK, + $ CZERO, B, LDB ) + CALL ZLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL ZLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL ZLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'ZGETSLS ' + CALL ZGETSLS( TRANS, M, N, NRHS, A, + $ LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'ZGETSLS ', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for ZGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL ZLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL ZQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK2, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for ZGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = ZQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = ZQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, B, + $ LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 )TRANS, + $ M, N, NRHS, MB, NB, ITYPE, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO END IF +* ===================================================== +* End test ZGELSTSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -680,37 +839,37 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * * workspace used: 2*MNMIN+NB*NB+NB*MAX(N,NRHS) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = ZQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = ZQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK, RWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 4 ) ) + $ RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -736,38 +895,38 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 8 ) ) + $ RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -792,45 +951,45 @@ SUBROUTINE ZDRVLS( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB, $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 12 ) ) + $ RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 80 K = 3, 14 + DO 80 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) diff --git a/TESTING/LIN/zdrvrf3.f b/TESTING/LIN/zdrvrf3.f index 7a44dba29f..4e55b03ef7 100644 --- a/TESTING/LIN/zdrvrf3.f +++ b/TESTING/LIN/zdrvrf3.f @@ -156,9 +156,10 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Functions .. + LOGICAL LSAME DOUBLE PRECISION DLAMCH, ZLANGE COMPLEX*16 ZLARND - EXTERNAL DLAMCH, ZLARND, ZLANGE + EXTERNAL DLAMCH, ZLARND, ZLANGE, LSAME * .. * .. External Subroutines .. EXTERNAL ZTRTTF, ZGEQRF, ZGEQLF, ZTFSM, ZTRSM @@ -222,9 +223,9 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO 100 IALPHA = 1, 3 * - IF ( IALPHA.EQ. 1) THEN + IF ( IALPHA.EQ.1 ) THEN ALPHA = ZERO - ELSE IF ( IALPHA.EQ. 2) THEN + ELSE IF ( IALPHA.EQ.2 ) THEN ALPHA = ONE ELSE ALPHA = ZLARND( 4, ISEED ) @@ -263,7 +264,7 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, NA DO I = 1, NA - A( I, J) = ZLARND( 4, ISEED ) + A( I, J ) = ZLARND( 4, ISEED ) END DO END DO * @@ -276,6 +277,20 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL ZGEQRF( NA, NA, A, LDA, TAU, + Z_WORK_ZGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO J = 1, NA + DO I = 1, J + A( I, J ) = A( I, J ) / + + ( 2.0 * A( J, J ) ) + END DO + END DO + END IF +* ELSE * * The case IUPLO.EQ.2 is when SIDE.EQ.'L' @@ -285,6 +300,20 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, CALL ZGELQF( NA, NA, A, LDA, TAU, + Z_WORK_ZGEQRF, LDA, + INFO ) +* +* Forcing main diagonal of test matrix to +* be unit makes it ill-conditioned for +* some test cases +* + IF ( LSAME( DIAG, 'U' ) ) THEN + DO I = 1, NA + DO J = 1, I + A( I, J ) = A( I, J ) / + + ( 2.0 * A( I, I ) ) + END DO + END DO + END IF +* END IF * * After the QR factorization, the diagonal @@ -293,7 +322,8 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * value 1.0E+00. * DO J = 1, NA - A( J, J) = A(J,J) * ZLARND( 5, ISEED ) + A( J, J ) = A( J, J ) * + + ZLARND( 5, ISEED ) END DO * * Store a copy of A in RFP format (in ARF). @@ -307,8 +337,8 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = ZLARND( 4, ISEED ) - B2( I, J) = B1( I, J) + B1( I, J ) = ZLARND( 4, ISEED ) + B2( I, J ) = B1( I, J ) END DO END DO * @@ -331,24 +361,24 @@ SUBROUTINE ZDRVRF3( NOUT, NN, NVAL, THRESH, A, LDA, ARF, B1, B2, * DO J = 1, N DO I = 1, M - B1( I, J) = B2( I, J ) - B1( I, J ) + B1( I, J ) = B2( I, J ) - B1( I, J ) END DO END DO * - RESULT(1) = ZLANGE( 'I', M, N, B1, LDA, + RESULT( 1 ) = ZLANGE( 'I', M, N, B1, LDA, + D_WORK_ZLANGE ) * - RESULT(1) = RESULT(1) / SQRT( EPS ) - + / MAX ( MAX( M, N), 1 ) + RESULT( 1 ) = RESULT( 1 ) / SQRT( EPS ) + + / MAX ( MAX( M, N ), 1 ) * - IF( RESULT(1).GE.THRESH ) THEN + IF( RESULT( 1 ).GE.THRESH ) THEN IF( NFAIL.EQ.0 ) THEN WRITE( NOUT, * ) WRITE( NOUT, FMT = 9999 ) END IF WRITE( NOUT, FMT = 9997 ) 'ZTFSM', + CFORM, SIDE, UPLO, TRANS, DIAG, M, - + N, RESULT(1) + + N, RESULT( 1 ) NFAIL = NFAIL + 1 END IF * diff --git a/TESTING/LIN/zerrls.f b/TESTING/LIN/zerrls.f index 66e56c8c6b..22f049ee06 100644 --- a/TESTING/LIN/zerrls.f +++ b/TESTING/LIN/zerrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> ZERRLS tests the error exits for the COMPLEX*16 least squares -*> driver routines (ZGELS, CGELSS, CGELSY, CGELSD). +*> driver routines (ZGELS, ZGELST, ZGETSLS, CGELSS, CGELSY, CGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ SUBROUTINE ZERRLS( PATH, NUNIT ) EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, ZGELS, ZGELSD, ZGELSS, ZGELSY + EXTERNAL ALAESM, CHKXER, ZGELS, ZGELSD, ZGELSS, ZGELST, + $ ZGELSY, ZGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ SUBROUTINE ZERRLS( PATH, NUNIT ) INFOT = 8 CALL ZGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'ZGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'ZGELS ', INFOT, NOUT, LERR, OK ) * +* ZGELST +* + SRNAMT = 'ZGELST' + INFOT = 1 + CALL ZGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) +* +* ZGETSLS +* + SRNAMT = 'ZGETSLS' + INFOT = 1 + CALL ZGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) +* * ZGELSS * SRNAMT = 'ZGELSS'